Compare commits

...

2 Commits

Author SHA1 Message Date
446a27c040 refactor md parser, TODO: parse_str 2025-11-14 02:22:51 +02:00
d74613aa93 added gitea cargo test workflow 2025-11-09 21:16:54 +02:00
8 changed files with 405 additions and 42 deletions

View File

@@ -0,0 +1,23 @@
name: Test
on:
push:
branches: [ "master" ]
pull_request:
branches: [ "master" ]
env:
CARGO_TERM_COLOR: always
jobs:
build:
name: Cargo test
runs-on: ubuntu-latest
steps:
- name: Build
uses: actions/checkout@v4
run: cargo build --verbose
- name: Run tests
run: cargo test --verbose

View File

@@ -171,8 +171,11 @@ mod convert_md_to_html_test {
#[test]
fn single_header() {
let md = "# Header 1";
let html = parse(md).to_html();
let ast = match parse(md) {
Ok(a) => a,
Err(e) => panic!("{}", e),
};
let html = ast.to_html();
assert_eq!(
html,
@@ -180,11 +183,22 @@ mod convert_md_to_html_test {
);
}
#[test]
fn single_header_wrong_format() {
let md = "#Whoops";
let ast = parse(md);
assert!(ast.is_err());
}
#[test]
fn nested_bold_headers_and_nested_code_paragraph() {
let md = "# *Bold* header 1\n## Header 2\nrun `sudo rm -rf /` on your computer";
let html = parse(md).to_html();
let ast = match parse(md) {
Ok(a) => a,
Err(e) => panic!("{}", e),
};
let html = ast.to_html();
assert_eq!(
html,
@@ -192,3 +206,21 @@ mod convert_md_to_html_test {
);
}
}
#[cfg(test)]
mod parse_real_md {
use std::fs;
use crate::parser::parse;
#[test]
fn go() {
let file = "./test.md";
let md = fs::read_to_string(file).expect("reading ./test.md failed");
let _ast = match parse(&md).map_err(|e| e.set_file(file.into())) {
Ok(a) => a,
Err(e) => panic!("{}", e),
};
}
}

View File

@@ -1,4 +1,4 @@
#![deny(dead_code, unused_imports)]
#![deny(unused_imports)]
use fstools::crawl_fs;
use parser::parse;
@@ -11,9 +11,91 @@ use std::{
use to_html::ToHtml;
pub mod ast;
mod parse_trait;
pub mod parser;
pub mod to_html;
#[derive(Debug)]
pub struct MdParseError {
file: Option<PathBuf>,
line: Option<usize>,
//col: Option<usize>,
expected: String,
got: String,
}
impl MdParseError {
pub fn new(expected: impl ToString, got: impl ToString) -> Self {
Self {
file: None,
line: None,
//col: None,
expected: expected.to_string(),
got: got.to_string(),
}
}
pub fn from_line(line: usize, expected: impl ToString, got: impl ToString) -> Self {
Self {
file: None,
line: Some(line),
//col: None,
expected: expected.to_string(),
got: got.to_string(),
}
}
/*
pub fn from_col(col: usize, expected: impl ToString, got: impl ToString) -> Self {
Self {
file: None,
line: None,
col: Some(col),
expected: expected.to_string(),
got: got.to_string(),
}
}
*/
pub fn set_line(self, line: usize) -> Self {
Self {
file: self.file,
line: Some(line),
//col: self.col,
expected: self.expected,
got: self.got,
}
}
pub fn set_file(self, file: PathBuf) -> Self {
Self {
file: Some(file),
line: self.line,
//col: self.col,
expected: self.expected,
got: self.got,
}
}
}
impl Display for MdParseError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
// no error message :/
let file = self.file.clone().unwrap_or("<unknown>".into());
write!(
f,
"Parse error in '{}' on line {}: expected '{}', got '{}'",
file.display(),
self.line.unwrap_or(0),
//self.col.unwrap_or(0),
self.expected,
self.got
)
}
}
impl std::error::Error for MdParseError {}
#[derive(Debug)]
pub enum Error {
OutDirIsNotEmpty,
@@ -24,6 +106,7 @@ pub enum Error {
FileWrite,
FileCreate,
DirCreate,
Parse(MdParseError),
}
impl Display for Error {
@@ -32,6 +115,12 @@ impl Display for Error {
}
}
impl From<MdParseError> for Error {
fn from(value: MdParseError) -> Self {
Error::Parse(value)
}
}
impl std::error::Error for Error {}
type Result<T> = std::result::Result<T, crate::Error>;
@@ -44,7 +133,7 @@ pub fn generate(indir: &PathBuf, outdir: &PathBuf, force: bool) -> Result<()> {
// read and parse md file
let content = fs::read_to_string(&fullpath).map_err(|_e| Error::FileRead)?;
let html = parse(&content).to_html();
let html = parse(&content)?.to_html();
// write html data to file
let mut newpath = outdir.to_owned();

View File

@@ -0,0 +1,111 @@
/*
use crate::MdParseError;
pub type Pattern<T> = Vec<PatternToken<T>>;
pub enum PatternToken<T> {
Once(T),
Optional(T),
AtLeastOnce(T),
NTimes(T),
}
/// panics: on invalid pattern
pub fn char_pattern(s: &str) -> Pattern<char> {
let mut s_chars = s.chars().peekable();
let mut pat: Pattern<char> = Vec::new();
while let Some(token) = s_chars.next() {
pat.push(if let Some(&next) = s_chars.peek() {
match next {
'?' => {
s_chars.next().unwrap();
PatternToken::Optional(token)
}
'+' => {
s_chars.next().unwrap();
PatternToken::AtLeastOnce(token)
}
'*' => {
s_chars.next().unwrap();
PatternToken::NTimes(token)
}
_ => PatternToken::Once(token),
}
} else {
PatternToken::Once(token)
});
}
pat
}
pub trait ParsePattern: Iterator + Clone {
fn parse<T>(&mut self, expect: Pattern<T>) -> Result<Vec<Self::Item>, MdParseError>
where
T: PartialEq<<Self as Iterator>::Item>,
{
let mut consumed = Vec::new();
let mut cloned = self.clone();
for pat_token in expect {
match pat_token {
PatternToken::Once(c) => {
if !cloned.next().map(|v| c == v).unwrap_or(false) {
return None;
}
}
PatternToken::Optional(c) => if cloned.peek().map(|v| c == *v).unwrap_or(false) {},
}
}
*self = cloned;
Some(consumed)
}
}
*/
pub trait Parse: Iterator {
fn follows(&mut self, token: char) -> bool;
fn parse_token(&mut self, token: char) -> bool {
if self.follows(token) {
let _ = self.next();
true
} else {
false
}
}
fn parse_str(&mut self, _tokens: &str) -> bool {
todo!()
}
}
impl Parse for std::iter::Peekable<std::str::Chars<'_>> {
fn follows(&mut self, token: char) -> bool {
self.peek().map(|c| c == &token).unwrap_or(false)
}
}
impl Parse for std::iter::Peekable<std::iter::Enumerate<std::str::Chars<'_>>> {
fn follows(&mut self, token: char) -> bool {
self.peekable()
.peek()
.map(|&(_i, c)| c == token)
.unwrap_or(false)
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn chars_parse_follows_double() {
let mut c = "abc".chars().peekable();
assert!(c.follows('a'));
assert!(c.follows('a'));
}
}

View File

@@ -3,14 +3,15 @@ mod inline;
use block::parse_blocks;
use crate::ast::Document;
use crate::{MdParseError, ast::Document};
pub fn parse(s: &str) -> Document {
Document {
blocks: parse_blocks(s),
}
pub fn parse(s: &str) -> Result<Document, MdParseError> {
Ok(Document {
blocks: parse_blocks(s)?,
})
}
/*
#[cfg(test)]
mod test {
use crate::ast::*;
@@ -145,3 +146,4 @@ mod test {
);
}
}
// */

View File

@@ -1,22 +1,106 @@
use crate::ast::Block;
use super::inline::parse_inlines;
use crate::{MdParseError, ast::Block};
pub fn parse_blocks(input: &str) -> Vec<Block> {
use crate::parse_trait::Parse;
pub fn parse_blocks(input: &str) -> Result<Vec<Block>, MdParseError> {
let mut blocks = Vec::new();
let mut lines = input.lines().enumerate().peekable();
while let Some((i, line)) = lines.next() {
let mut line_chars = line.chars().peekable();
// header
let mut heading_level = 0;
while line_chars.parse_token('#') {
if heading_level < 6 {
heading_level += 1;
}
}
if heading_level > 0 {
if !line_chars.parse_token(' ') {
Err(MdParseError::from_line(
i + 1,
"<space> after #",
"no <space>",
))?;
}
let line_content: String = line_chars.collect();
blocks.push(Block::Heading {
level: heading_level,
content: parse_inlines(&line_content)?,
});
continue;
}
// quote TODO
/*
if line_chars.parse_str("> ") {
let content: String = line_chars.collect();
let quote_blocks = parse_blocks(&content).map_err(|e| e.set_line(i + 1))?;
blocks.push(Block::Quote(quote_blocks));
continue;
}
*/
// code
if line_chars.parse_str("```") {
let lang_line: String = line_chars.collect();
let lang = if lang_line.is_empty() {
None
} else {
Some(lang_line)
};
let mut code = String::new();
for (j, line) in lines.by_ref() {
let mut code_line_chars = line.chars().peekable();
// code block end
if code_line_chars.parse_str("```") {
let remaining: String = code_line_chars.collect();
if remaining.is_empty() {
blocks.push(Block::Code {
language: lang,
content: code,
});
break;
} else {
Err(MdParseError::from_line(
j + 1,
"```",
format!("```{}", remaining),
))?;
}
} else {
code.push_str(line);
code.push('\n');
}
}
Err(MdParseError::from_line(i + 1, "a terminating '```'", ""))?;
}
// lists TODO
}
Ok(blocks)
}
/*
pub fn parse_blocks(input: &str) -> Result<Vec<Block>, MdParseError> {
let mut blocks = Vec::new();
let mut lines = input.lines().peekable();
let mut lines = input.lines().enumerate().peekable();
while let Some(line) = lines.next() {
while let Some((i, line)) = lines.next() {
if line.starts_with("#") {
let level = line.chars().take_while(|&c| c == '#').count() as u8;
let text = line[level as usize..].trim();
blocks.push(Block::Heading {
level,
content: parse_inlines(text),
content: parse_inlines(text).map_err(|e| e.set_line(i + 1))?,
});
} else if let Some(quote_body) = line.strip_prefix(">") {
let quote_blocks = parse_blocks(quote_body);
let quote_blocks = parse_blocks(quote_body).map_err(|e| e.set_line(i + 1))?;
blocks.push(Block::Quote(quote_blocks));
} else if line.starts_with("```") {
let lang_line = line.strip_prefix("```").unwrap().to_string();
@@ -26,8 +110,16 @@ pub fn parse_blocks(input: &str) -> Vec<Block> {
Some(lang_line)
};
let mut code = String::new();
while lines.peek().is_some() && !lines.peek().unwrap().starts_with("```") {
code.push_str(&format!("{}\n", lines.next().unwrap()));
while lines.peek().is_some()
&& !lines
.peek()
.ok_or(MdParseError::from_line(i + 1, "a line", ""))?
.1
.starts_with("```")
{
if let Some((_i, l)) = lines.next() {
code.push_str(&format!("{}\n", l));
}
}
lines.next();
blocks.push(Block::Code {
@@ -37,9 +129,12 @@ pub fn parse_blocks(input: &str) -> Vec<Block> {
} else if line.trim().is_empty() {
continue;
} else {
blocks.push(Block::Paragraph(parse_inlines(line)));
blocks.push(Block::Paragraph(
parse_inlines(line).map_err(|e| e.set_line(i + 1))?,
));
}
}
blocks
Ok(blocks)
}
*/

View File

@@ -1,61 +1,65 @@
use crate::ast::Inline;
use crate::{MdParseError, ast::Inline};
pub fn parse_inlines(input: &str) -> Vec<Inline> {
pub fn parse_inlines(input: &str) -> Result<Vec<Inline>, MdParseError> {
let mut inlines = Vec::new();
let mut chars = input.chars().peekable();
while let Some(c) = chars.next() {
match c {
'*' => {
let inner = collect_until(&mut chars, '*');
inlines.push(Inline::Bold(parse_inlines(&inner)));
let inner = collect_until(&mut chars, '*')?;
inlines.push(Inline::Bold(parse_inlines(&inner)?));
}
'_' => {
let inner = collect_until(&mut chars, '_');
inlines.push(Inline::Italic(parse_inlines(&inner)));
let inner = collect_until(&mut chars, '_')?;
inlines.push(Inline::Italic(parse_inlines(&inner)?));
}
'`' => {
let code = collect_until(&mut chars, '`');
let code = collect_until(&mut chars, '`')?;
inlines.push(Inline::Code(code));
}
'[' => {
let text = collect_until(&mut chars, ']');
if chars.next() == Some('(') {
let href = collect_until(&mut chars, ')');
let text = collect_until(&mut chars, ']')?;
if let Some('(') = chars.next() {
let href = collect_until(&mut chars, ')')?;
inlines.push(Inline::Link {
text: parse_inlines(&text),
text: parse_inlines(&text)?,
href,
});
} else {
Err(MdParseError::new(
"(<href>)",
chars.next().unwrap_or_default(),
))?;
}
}
_ => {
let mut text = String::new();
text.push(c);
while let Some(&nc) = chars.peek() {
while let Some(nc) = chars.next() {
if matches!(nc, '*' | '_' | '`' | '[') {
break;
}
text.push(chars.next().unwrap());
text.push(nc);
}
inlines.push(Inline::Text(text));
}
}
}
inlines
Ok(inlines)
}
fn collect_until<I: Iterator<Item = char>>(
chars: &mut std::iter::Peekable<I>,
end: char,
) -> String {
) -> Result<String, MdParseError> {
let mut s = String::new();
while let Some(&c) = chars.peek() {
while let Some(c) = chars.next() {
if c == end {
chars.next();
break;
return Ok(s);
}
s.push(chars.next().unwrap());
s.push(c);
}
s
Err(MdParseError::new(end, ""))
}

7
cracked_md/test.md Normal file
View File

@@ -0,0 +1,7 @@
# Header *1kkkkkkkkkkkkkkkkkkkkkk*
this is some code: `abc
```code
oiajwefoijao089uaoisdjfoijasdfoijasdofij