Compare commits
2 Commits
dbaa2feb48
...
446a27c040
| Author | SHA1 | Date | |
|---|---|---|---|
| 446a27c040 | |||
| d74613aa93 |
23
.gitea/workflows/cargo-test.yml
Normal file
23
.gitea/workflows/cargo-test.yml
Normal file
@@ -0,0 +1,23 @@
|
||||
name: Test
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ "master" ]
|
||||
pull_request:
|
||||
branches: [ "master" ]
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: Cargo test
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Build
|
||||
uses: actions/checkout@v4
|
||||
run: cargo build --verbose
|
||||
|
||||
- name: Run tests
|
||||
run: cargo test --verbose
|
||||
@@ -171,8 +171,11 @@ mod convert_md_to_html_test {
|
||||
#[test]
|
||||
fn single_header() {
|
||||
let md = "# Header 1";
|
||||
|
||||
let html = parse(md).to_html();
|
||||
let ast = match parse(md) {
|
||||
Ok(a) => a,
|
||||
Err(e) => panic!("{}", e),
|
||||
};
|
||||
let html = ast.to_html();
|
||||
|
||||
assert_eq!(
|
||||
html,
|
||||
@@ -180,11 +183,22 @@ mod convert_md_to_html_test {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn single_header_wrong_format() {
|
||||
let md = "#Whoops";
|
||||
let ast = parse(md);
|
||||
|
||||
assert!(ast.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nested_bold_headers_and_nested_code_paragraph() {
|
||||
let md = "# *Bold* header 1\n## Header 2\nrun `sudo rm -rf /` on your computer";
|
||||
|
||||
let html = parse(md).to_html();
|
||||
let ast = match parse(md) {
|
||||
Ok(a) => a,
|
||||
Err(e) => panic!("{}", e),
|
||||
};
|
||||
let html = ast.to_html();
|
||||
|
||||
assert_eq!(
|
||||
html,
|
||||
@@ -192,3 +206,21 @@ mod convert_md_to_html_test {
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod parse_real_md {
|
||||
use std::fs;
|
||||
|
||||
use crate::parser::parse;
|
||||
|
||||
#[test]
|
||||
fn go() {
|
||||
let file = "./test.md";
|
||||
let md = fs::read_to_string(file).expect("reading ./test.md failed");
|
||||
|
||||
let _ast = match parse(&md).map_err(|e| e.set_file(file.into())) {
|
||||
Ok(a) => a,
|
||||
Err(e) => panic!("{}", e),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#![deny(dead_code, unused_imports)]
|
||||
#![deny(unused_imports)]
|
||||
|
||||
use fstools::crawl_fs;
|
||||
use parser::parse;
|
||||
@@ -11,9 +11,91 @@ use std::{
|
||||
use to_html::ToHtml;
|
||||
|
||||
pub mod ast;
|
||||
mod parse_trait;
|
||||
pub mod parser;
|
||||
pub mod to_html;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct MdParseError {
|
||||
file: Option<PathBuf>,
|
||||
line: Option<usize>,
|
||||
//col: Option<usize>,
|
||||
expected: String,
|
||||
got: String,
|
||||
}
|
||||
|
||||
impl MdParseError {
|
||||
pub fn new(expected: impl ToString, got: impl ToString) -> Self {
|
||||
Self {
|
||||
file: None,
|
||||
line: None,
|
||||
//col: None,
|
||||
expected: expected.to_string(),
|
||||
got: got.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_line(line: usize, expected: impl ToString, got: impl ToString) -> Self {
|
||||
Self {
|
||||
file: None,
|
||||
line: Some(line),
|
||||
//col: None,
|
||||
expected: expected.to_string(),
|
||||
got: got.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
pub fn from_col(col: usize, expected: impl ToString, got: impl ToString) -> Self {
|
||||
Self {
|
||||
file: None,
|
||||
line: None,
|
||||
col: Some(col),
|
||||
expected: expected.to_string(),
|
||||
got: got.to_string(),
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
pub fn set_line(self, line: usize) -> Self {
|
||||
Self {
|
||||
file: self.file,
|
||||
line: Some(line),
|
||||
//col: self.col,
|
||||
expected: self.expected,
|
||||
got: self.got,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_file(self, file: PathBuf) -> Self {
|
||||
Self {
|
||||
file: Some(file),
|
||||
line: self.line,
|
||||
//col: self.col,
|
||||
expected: self.expected,
|
||||
got: self.got,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for MdParseError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
// no error message :/
|
||||
let file = self.file.clone().unwrap_or("<unknown>".into());
|
||||
write!(
|
||||
f,
|
||||
"Parse error in '{}' on line {}: expected '{}', got '{}'",
|
||||
file.display(),
|
||||
self.line.unwrap_or(0),
|
||||
//self.col.unwrap_or(0),
|
||||
self.expected,
|
||||
self.got
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for MdParseError {}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
OutDirIsNotEmpty,
|
||||
@@ -24,6 +106,7 @@ pub enum Error {
|
||||
FileWrite,
|
||||
FileCreate,
|
||||
DirCreate,
|
||||
Parse(MdParseError),
|
||||
}
|
||||
|
||||
impl Display for Error {
|
||||
@@ -32,6 +115,12 @@ impl Display for Error {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<MdParseError> for Error {
|
||||
fn from(value: MdParseError) -> Self {
|
||||
Error::Parse(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for Error {}
|
||||
|
||||
type Result<T> = std::result::Result<T, crate::Error>;
|
||||
@@ -44,7 +133,7 @@ pub fn generate(indir: &PathBuf, outdir: &PathBuf, force: bool) -> Result<()> {
|
||||
|
||||
// read and parse md file
|
||||
let content = fs::read_to_string(&fullpath).map_err(|_e| Error::FileRead)?;
|
||||
let html = parse(&content).to_html();
|
||||
let html = parse(&content)?.to_html();
|
||||
|
||||
// write html data to file
|
||||
let mut newpath = outdir.to_owned();
|
||||
|
||||
111
cracked_md/src/parse_trait.rs
Normal file
111
cracked_md/src/parse_trait.rs
Normal file
@@ -0,0 +1,111 @@
|
||||
/*
|
||||
|
||||
use crate::MdParseError;
|
||||
|
||||
pub type Pattern<T> = Vec<PatternToken<T>>;
|
||||
|
||||
pub enum PatternToken<T> {
|
||||
Once(T),
|
||||
Optional(T),
|
||||
AtLeastOnce(T),
|
||||
NTimes(T),
|
||||
}
|
||||
|
||||
/// panics: on invalid pattern
|
||||
pub fn char_pattern(s: &str) -> Pattern<char> {
|
||||
let mut s_chars = s.chars().peekable();
|
||||
let mut pat: Pattern<char> = Vec::new();
|
||||
while let Some(token) = s_chars.next() {
|
||||
pat.push(if let Some(&next) = s_chars.peek() {
|
||||
match next {
|
||||
'?' => {
|
||||
s_chars.next().unwrap();
|
||||
PatternToken::Optional(token)
|
||||
}
|
||||
'+' => {
|
||||
s_chars.next().unwrap();
|
||||
PatternToken::AtLeastOnce(token)
|
||||
}
|
||||
'*' => {
|
||||
s_chars.next().unwrap();
|
||||
PatternToken::NTimes(token)
|
||||
}
|
||||
_ => PatternToken::Once(token),
|
||||
}
|
||||
} else {
|
||||
PatternToken::Once(token)
|
||||
});
|
||||
}
|
||||
pat
|
||||
}
|
||||
|
||||
pub trait ParsePattern: Iterator + Clone {
|
||||
fn parse<T>(&mut self, expect: Pattern<T>) -> Result<Vec<Self::Item>, MdParseError>
|
||||
where
|
||||
T: PartialEq<<Self as Iterator>::Item>,
|
||||
{
|
||||
let mut consumed = Vec::new();
|
||||
let mut cloned = self.clone();
|
||||
|
||||
for pat_token in expect {
|
||||
match pat_token {
|
||||
PatternToken::Once(c) => {
|
||||
if !cloned.next().map(|v| c == v).unwrap_or(false) {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
PatternToken::Optional(c) => if cloned.peek().map(|v| c == *v).unwrap_or(false) {},
|
||||
}
|
||||
}
|
||||
|
||||
*self = cloned;
|
||||
|
||||
Some(consumed)
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
pub trait Parse: Iterator {
|
||||
fn follows(&mut self, token: char) -> bool;
|
||||
|
||||
fn parse_token(&mut self, token: char) -> bool {
|
||||
if self.follows(token) {
|
||||
let _ = self.next();
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_str(&mut self, _tokens: &str) -> bool {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
impl Parse for std::iter::Peekable<std::str::Chars<'_>> {
|
||||
fn follows(&mut self, token: char) -> bool {
|
||||
self.peek().map(|c| c == &token).unwrap_or(false)
|
||||
}
|
||||
}
|
||||
|
||||
impl Parse for std::iter::Peekable<std::iter::Enumerate<std::str::Chars<'_>>> {
|
||||
fn follows(&mut self, token: char) -> bool {
|
||||
self.peekable()
|
||||
.peek()
|
||||
.map(|&(_i, c)| c == token)
|
||||
.unwrap_or(false)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn chars_parse_follows_double() {
|
||||
let mut c = "abc".chars().peekable();
|
||||
|
||||
assert!(c.follows('a'));
|
||||
assert!(c.follows('a'));
|
||||
}
|
||||
}
|
||||
@@ -3,14 +3,15 @@ mod inline;
|
||||
|
||||
use block::parse_blocks;
|
||||
|
||||
use crate::ast::Document;
|
||||
use crate::{MdParseError, ast::Document};
|
||||
|
||||
pub fn parse(s: &str) -> Document {
|
||||
Document {
|
||||
blocks: parse_blocks(s),
|
||||
}
|
||||
pub fn parse(s: &str) -> Result<Document, MdParseError> {
|
||||
Ok(Document {
|
||||
blocks: parse_blocks(s)?,
|
||||
})
|
||||
}
|
||||
|
||||
/*
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::ast::*;
|
||||
@@ -145,3 +146,4 @@ mod test {
|
||||
);
|
||||
}
|
||||
}
|
||||
// */
|
||||
|
||||
@@ -1,22 +1,106 @@
|
||||
use crate::ast::Block;
|
||||
|
||||
use super::inline::parse_inlines;
|
||||
use crate::{MdParseError, ast::Block};
|
||||
|
||||
pub fn parse_blocks(input: &str) -> Vec<Block> {
|
||||
use crate::parse_trait::Parse;
|
||||
|
||||
pub fn parse_blocks(input: &str) -> Result<Vec<Block>, MdParseError> {
|
||||
let mut blocks = Vec::new();
|
||||
let mut lines = input.lines().enumerate().peekable();
|
||||
|
||||
while let Some((i, line)) = lines.next() {
|
||||
let mut line_chars = line.chars().peekable();
|
||||
|
||||
// header
|
||||
let mut heading_level = 0;
|
||||
while line_chars.parse_token('#') {
|
||||
if heading_level < 6 {
|
||||
heading_level += 1;
|
||||
}
|
||||
}
|
||||
if heading_level > 0 {
|
||||
if !line_chars.parse_token(' ') {
|
||||
Err(MdParseError::from_line(
|
||||
i + 1,
|
||||
"<space> after #",
|
||||
"no <space>",
|
||||
))?;
|
||||
}
|
||||
let line_content: String = line_chars.collect();
|
||||
blocks.push(Block::Heading {
|
||||
level: heading_level,
|
||||
content: parse_inlines(&line_content)?,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
// quote TODO
|
||||
/*
|
||||
if line_chars.parse_str("> ") {
|
||||
let content: String = line_chars.collect();
|
||||
let quote_blocks = parse_blocks(&content).map_err(|e| e.set_line(i + 1))?;
|
||||
blocks.push(Block::Quote(quote_blocks));
|
||||
continue;
|
||||
}
|
||||
*/
|
||||
|
||||
// code
|
||||
if line_chars.parse_str("```") {
|
||||
let lang_line: String = line_chars.collect();
|
||||
let lang = if lang_line.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(lang_line)
|
||||
};
|
||||
let mut code = String::new();
|
||||
|
||||
for (j, line) in lines.by_ref() {
|
||||
let mut code_line_chars = line.chars().peekable();
|
||||
// code block end
|
||||
if code_line_chars.parse_str("```") {
|
||||
let remaining: String = code_line_chars.collect();
|
||||
if remaining.is_empty() {
|
||||
blocks.push(Block::Code {
|
||||
language: lang,
|
||||
content: code,
|
||||
});
|
||||
break;
|
||||
} else {
|
||||
Err(MdParseError::from_line(
|
||||
j + 1,
|
||||
"```",
|
||||
format!("```{}", remaining),
|
||||
))?;
|
||||
}
|
||||
} else {
|
||||
code.push_str(line);
|
||||
code.push('\n');
|
||||
}
|
||||
}
|
||||
Err(MdParseError::from_line(i + 1, "a terminating '```'", ""))?;
|
||||
}
|
||||
|
||||
// lists TODO
|
||||
}
|
||||
|
||||
Ok(blocks)
|
||||
}
|
||||
|
||||
/*
|
||||
pub fn parse_blocks(input: &str) -> Result<Vec<Block>, MdParseError> {
|
||||
let mut blocks = Vec::new();
|
||||
|
||||
let mut lines = input.lines().peekable();
|
||||
let mut lines = input.lines().enumerate().peekable();
|
||||
|
||||
while let Some(line) = lines.next() {
|
||||
while let Some((i, line)) = lines.next() {
|
||||
if line.starts_with("#") {
|
||||
let level = line.chars().take_while(|&c| c == '#').count() as u8;
|
||||
let text = line[level as usize..].trim();
|
||||
blocks.push(Block::Heading {
|
||||
level,
|
||||
content: parse_inlines(text),
|
||||
content: parse_inlines(text).map_err(|e| e.set_line(i + 1))?,
|
||||
});
|
||||
} else if let Some(quote_body) = line.strip_prefix(">") {
|
||||
let quote_blocks = parse_blocks(quote_body);
|
||||
let quote_blocks = parse_blocks(quote_body).map_err(|e| e.set_line(i + 1))?;
|
||||
blocks.push(Block::Quote(quote_blocks));
|
||||
} else if line.starts_with("```") {
|
||||
let lang_line = line.strip_prefix("```").unwrap().to_string();
|
||||
@@ -26,8 +110,16 @@ pub fn parse_blocks(input: &str) -> Vec<Block> {
|
||||
Some(lang_line)
|
||||
};
|
||||
let mut code = String::new();
|
||||
while lines.peek().is_some() && !lines.peek().unwrap().starts_with("```") {
|
||||
code.push_str(&format!("{}\n", lines.next().unwrap()));
|
||||
while lines.peek().is_some()
|
||||
&& !lines
|
||||
.peek()
|
||||
.ok_or(MdParseError::from_line(i + 1, "a line", ""))?
|
||||
.1
|
||||
.starts_with("```")
|
||||
{
|
||||
if let Some((_i, l)) = lines.next() {
|
||||
code.push_str(&format!("{}\n", l));
|
||||
}
|
||||
}
|
||||
lines.next();
|
||||
blocks.push(Block::Code {
|
||||
@@ -37,9 +129,12 @@ pub fn parse_blocks(input: &str) -> Vec<Block> {
|
||||
} else if line.trim().is_empty() {
|
||||
continue;
|
||||
} else {
|
||||
blocks.push(Block::Paragraph(parse_inlines(line)));
|
||||
blocks.push(Block::Paragraph(
|
||||
parse_inlines(line).map_err(|e| e.set_line(i + 1))?,
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
blocks
|
||||
Ok(blocks)
|
||||
}
|
||||
*/
|
||||
|
||||
@@ -1,61 +1,65 @@
|
||||
use crate::ast::Inline;
|
||||
use crate::{MdParseError, ast::Inline};
|
||||
|
||||
pub fn parse_inlines(input: &str) -> Vec<Inline> {
|
||||
pub fn parse_inlines(input: &str) -> Result<Vec<Inline>, MdParseError> {
|
||||
let mut inlines = Vec::new();
|
||||
let mut chars = input.chars().peekable();
|
||||
|
||||
while let Some(c) = chars.next() {
|
||||
match c {
|
||||
'*' => {
|
||||
let inner = collect_until(&mut chars, '*');
|
||||
inlines.push(Inline::Bold(parse_inlines(&inner)));
|
||||
let inner = collect_until(&mut chars, '*')?;
|
||||
inlines.push(Inline::Bold(parse_inlines(&inner)?));
|
||||
}
|
||||
'_' => {
|
||||
let inner = collect_until(&mut chars, '_');
|
||||
inlines.push(Inline::Italic(parse_inlines(&inner)));
|
||||
let inner = collect_until(&mut chars, '_')?;
|
||||
inlines.push(Inline::Italic(parse_inlines(&inner)?));
|
||||
}
|
||||
'`' => {
|
||||
let code = collect_until(&mut chars, '`');
|
||||
let code = collect_until(&mut chars, '`')?;
|
||||
inlines.push(Inline::Code(code));
|
||||
}
|
||||
'[' => {
|
||||
let text = collect_until(&mut chars, ']');
|
||||
if chars.next() == Some('(') {
|
||||
let href = collect_until(&mut chars, ')');
|
||||
let text = collect_until(&mut chars, ']')?;
|
||||
if let Some('(') = chars.next() {
|
||||
let href = collect_until(&mut chars, ')')?;
|
||||
inlines.push(Inline::Link {
|
||||
text: parse_inlines(&text),
|
||||
text: parse_inlines(&text)?,
|
||||
href,
|
||||
});
|
||||
} else {
|
||||
Err(MdParseError::new(
|
||||
"(<href>)",
|
||||
chars.next().unwrap_or_default(),
|
||||
))?;
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
let mut text = String::new();
|
||||
text.push(c);
|
||||
while let Some(&nc) = chars.peek() {
|
||||
while let Some(nc) = chars.next() {
|
||||
if matches!(nc, '*' | '_' | '`' | '[') {
|
||||
break;
|
||||
}
|
||||
text.push(chars.next().unwrap());
|
||||
text.push(nc);
|
||||
}
|
||||
inlines.push(Inline::Text(text));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inlines
|
||||
Ok(inlines)
|
||||
}
|
||||
|
||||
fn collect_until<I: Iterator<Item = char>>(
|
||||
chars: &mut std::iter::Peekable<I>,
|
||||
end: char,
|
||||
) -> String {
|
||||
) -> Result<String, MdParseError> {
|
||||
let mut s = String::new();
|
||||
while let Some(&c) = chars.peek() {
|
||||
while let Some(c) = chars.next() {
|
||||
if c == end {
|
||||
chars.next();
|
||||
break;
|
||||
return Ok(s);
|
||||
}
|
||||
s.push(chars.next().unwrap());
|
||||
s.push(c);
|
||||
}
|
||||
s
|
||||
Err(MdParseError::new(end, ""))
|
||||
}
|
||||
|
||||
7
cracked_md/test.md
Normal file
7
cracked_md/test.md
Normal file
@@ -0,0 +1,7 @@
|
||||
# Header *1kkkkkkkkkkkkkkkkkkkkkk*
|
||||
|
||||
this is some code: `abc
|
||||
|
||||
```code
|
||||
|
||||
oiajwefoijao089uaoisdjfoijasdfoijasdofij
|
||||
Reference in New Issue
Block a user