From 2e04cbee23414236404d404db41c22a51b077907 Mon Sep 17 00:00:00 2001 From: "Gustavo \"Guz\" L. de Mello" Date: Tue, 26 Mar 2024 15:13:33 -0300 Subject: [PATCH] feat: link parsing and basic cli setup --- .gitignore | 1 + Cargo.lock | 108 ++++++++++++++++++++++++++++++++++++++ Cargo.toml | 1 + src/lib.rs | 2 + src/links.rs | 143 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 109 ++++++++++++++++++++++++++++++++++++++- src/utils.rs | 38 ++++++++++++++ 7 files changed, 400 insertions(+), 2 deletions(-) create mode 100644 src/lib.rs create mode 100644 src/links.rs create mode 100644 src/utils.rs diff --git a/.gitignore b/.gitignore index 9cee32c..dacef26 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ /target .vault-backup/ +test.md .direnv diff --git a/Cargo.lock b/Cargo.lock index 852cc5c..afaf788 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -166,6 +166,21 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" +[[package]] +name = "clio" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7fc6734af48458f72f5a3fa7b840903606427d98a710256e808f76a965047d9" +dependencies = [ + "cfg-if", + "clap", + "is-terminal", + "libc", + "tempfile", + "walkdir", + "windows-sys 0.42.0", +] + [[package]] name = "colorchoice" version = "1.0.0" @@ -314,6 +329,12 @@ dependencies = [ "regex", ] +[[package]] +name = "fastrand" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" + [[package]] name = "flate2" version = "1.0.28" @@ -342,6 +363,12 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hermit-abi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" + [[package]] name = "ident_case" version = "1.0.1" @@ -358,6 +385,17 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "is-terminal" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys 0.52.0", +] + [[package]] name = "itoa" version = "1.0.10" @@ -402,6 +440,7 @@ name = "mdparser" version = "0.1.0" dependencies = [ "clap", + "clio", "comrak", "serde", "serde_json", @@ -690,6 +729,18 @@ dependencies = [ "yaml-rust", ] +[[package]] +name = "tempfile" +version = "3.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" +dependencies = [ + "cfg-if", + "fastrand", + "rustix", + "windows-sys 0.52.0", +] + [[package]] name = "terminal_size" version = "0.3.0" @@ -876,6 +927,21 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows-sys" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" +dependencies = [ + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", +] + [[package]] name = "windows-sys" version = "0.48.0" @@ -924,6 +990,12 @@ dependencies = [ "windows_x86_64_msvc 0.52.4", ] +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" + [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" @@ -936,6 +1008,12 @@ version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9" +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" + [[package]] name = "windows_aarch64_msvc" version = "0.48.5" @@ -948,6 +1026,12 @@ version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675" +[[package]] +name = "windows_i686_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" + [[package]] name = "windows_i686_gnu" version = "0.48.5" @@ -960,6 +1044,12 @@ version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3" +[[package]] +name = "windows_i686_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" + [[package]] name = "windows_i686_msvc" version = "0.48.5" @@ -972,6 +1062,12 @@ version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02" +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" + [[package]] name = "windows_x86_64_gnu" version = "0.48.5" @@ -984,6 +1080,12 @@ version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" + [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" @@ -996,6 +1098,12 @@ version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177" +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" + [[package]] name = "windows_x86_64_msvc" version = "0.48.5" diff --git a/Cargo.toml b/Cargo.toml index fb50b48..e8de811 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,7 @@ edition = "2021" [dependencies] clap = "4.5.3" +clio = { version = "0.3.5", features = ["clap-parse"] } comrak = "0.21.0" serde = { version = "1.0.197", features = ["derive"] } serde_json = "1.0.114" diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..e04f5bb --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,2 @@ +pub mod links; +pub mod utils; diff --git a/src/links.rs b/src/links.rs new file mode 100644 index 0000000..296f3e3 --- /dev/null +++ b/src/links.rs @@ -0,0 +1,143 @@ +use std::io::Write; +use std::{cell::RefCell, collections::HashMap}; +use std::{fs, io, path::PathBuf}; + +use comrak::nodes::{Ast, NodeLink, NodeValue}; +use comrak::{arena_tree::Node, Arena}; + +pub struct ParseOptions { + pub alias_prop: Option, + pub path_root: PathBuf, + pub to_complete_paths: bool, + pub remove_unalised: bool, + pub remove_unfound: bool, +} +impl Default for ParseOptions { + fn default() -> Self { + ParseOptions { + alias_prop: None, + path_root: PathBuf::new(), + to_complete_paths: true, + remove_unalised: true, + remove_unfound: true, + } + } +} + +#[derive(Debug)] +pub enum ParsingError { + AliasNotFound { file: String }, + AliasErr(GetAliasError), + IoErr(io::Error), +} +pub fn parse<'a>( + node: &'a Node<'a, RefCell>, + link: &mut NodeLink, + opts: &ParseOptions, +) -> Result<(), ParsingError> { + let path = match find_path(link, &opts.path_root) { + Ok(p) => p, + Err(err) => { + if opts.remove_unfound { + node.children().for_each(|n| node.insert_before(n)); + node.detach(); + return Ok(()); + } else { + return Err(ParsingError::IoErr(err)); + } + } + }; + + if opts.to_complete_paths { + link.url = String::from(path.to_string_lossy()) + } + + if let Some(a) = &opts.alias_prop { + let alias = match get_alias(&path, &a) { + Ok(a) => a, + Err(err) => return Err(ParsingError::AliasErr(err)), + }; + if let Some(v) = alias { + link.url = v; + } else if opts.remove_unalised { + node.children().for_each(|n| node.insert_before(n)); + node.detach(); + } else { + return Err(ParsingError::AliasNotFound { + file: link.url.clone(), + }); + } + } + Ok(()) +} + +#[derive(Debug)] +pub enum GetAliasError { + IoErr(io::Error), + YamlErr(serde_yaml::Error), +} +pub fn get_alias(path: &PathBuf, alias_prop: &String) -> Result, GetAliasError> { + let file = match fs::read_to_string(path) { + Ok(f) => f, + Err(err) => return Err(GetAliasError::IoErr(err)), + }; + + let arena = Arena::new(); + let ast = comrak::parse_document(&arena, &file, &crate::utils::default_options()); + + let alias = crate::utils::iter_nodes_r(ast, &|node| { + if let NodeValue::FrontMatter(f) = &node.data.borrow().value { + // Removes starting and trailing "---" delimiters from frontmatter's string. + let f = String::from(f.split("---").collect::>()[1]); + + let map = match serde_yaml::from_str::>(&f) { + Ok(m) => m, + Err(err) => { + return Some(Err::(GetAliasError::YamlErr(err))); + } + }; + + match map.get(alias_prop) { + // The hashmap will be dropped anyways and free up space, + // so whatever + Some(r) => Some(Ok(r.clone())), + None => None, + } + } else { + None + } + }); + + match alias { + Some(r) => match r { + Ok(s) => Ok(Some(s)), + Err(err) => Err(err), + }, + None => Ok(None), + } +} + +pub fn find_path(link: &NodeLink, path_root: &PathBuf) -> Result { + find_file(&link.url, &path_root) +} + +fn find_file(file: &String, path: &PathBuf) -> Result { + match fs::read_dir(path)?.find_map(|e| match e { + Ok(e) => { + if e.path().is_dir() { + match find_file(&file, &e.path()) { + Ok(r) => Some(r), + Err(_) => None, + } + } else if file == &e.file_name().to_string_lossy().to_string() { + Some(e.path()) + } else { + None + } + } + _ => None, + }) { + Some(r) => Ok(r), + None => Err(io::Error::new(io::ErrorKind::NotFound, "File not found")), + } +} diff --git a/src/main.rs b/src/main.rs index e7a11a9..f80ba6b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,108 @@ -fn main() { - println!("Hello, world!"); +use clap::{Parser, Subcommand}; +use clio::*; +use comrak::nodes::NodeValue; + +use mdparser::{links, utils}; + +#[derive(Parser, Debug)] +#[command(version = "0.1", about = "", long_about = None, propagate_version = true)] +struct Cli { + #[command(subcommand)] + command: Commands, + + #[arg(short, long, default_value = "-")] + input: Input, + + #[arg(short, long, default_value = "-")] + output: Output, + + #[arg(long)] + surpress_errors: bool, +} + +#[derive(Debug, Subcommand)] +enum Commands { + Links { + #[arg(short, long)] + path_root: clio::ClioPath, + + #[arg(short, long, default_value = "x_alias_url")] + alias_prop: String, + + #[arg(long)] + to_absolute_paths: bool, + + #[arg(long)] + not_remove_unalised: bool, + + #[arg(long)] + not_remove_unfound: bool, + }, + Not {}, +} + +fn main() { + let mut cli = Cli::parse(); + + let file = std::io::read_to_string(&mut cli.input).unwrap_or_else(|err| panic!("{err:#?}")); + let arena = comrak::Arena::new(); + let ast = comrak::parse_document(&arena, &file, &mdparser::utils::default_options()); + + // println!("{ast:#?}"); + + match &cli.command { + Commands::Links { + path_root, + alias_prop, + to_absolute_paths, + not_remove_unalised, + not_remove_unfound, + } => utils::iter_nodes(&ast, &|node| { + if let NodeValue::Link(ref mut link) = &mut node.data.borrow_mut().value { + match links::parse( + node, + link, + &links::ParseOptions { + path_root: path_root.to_path_buf(), + alias_prop: Some(String::from(alias_prop)), + to_complete_paths: *to_absolute_paths, + remove_unalised: !*not_remove_unalised, + remove_unfound: !*not_remove_unfound, + }, + ) { + Ok(_) => (), + Err(err) => { + if !&cli.surpress_errors { + panic!("{err:#?}\n"); + } else { + eprint!("{err:#?}\n"); + } + } + }; + + /* + if link.url.starts_with("https://") || link.url.starts_with("http://") { + return; + } + + let file = match mdparser::links::find_path(link, &vault_root.to_path_buf()) { + Ok(f) => f, + Err(_) => { + if !not_remove_unfound { + node.children().for_each(|t| node.insert_before(t)); + node.detach(); + } + return; + } + }; + + let n = node.parent(); + println!("{n:#?}"); + */ + } + }), + _ => (), + }; + + let _ = comrak::format_commonmark(&ast, &mdparser::utils::default_options(), &mut cli.output); } diff --git a/src/utils.rs b/src/utils.rs new file mode 100644 index 0000000..3d8cccb --- /dev/null +++ b/src/utils.rs @@ -0,0 +1,38 @@ +pub fn default_options() -> comrak::Options { + let mut opts = comrak::Options::default(); + + opts.render.width = 100; + opts.render.hardbreaks = false; + + opts.extension.strikethrough = true; + opts.extension.front_matter_delimiter = Some("---".to_owned()); + + opts +} + +pub fn iter_nodes<'a, F>(node: &'a comrak::nodes::AstNode<'a>, f: &F) +where + F: Fn(&'a comrak::nodes::AstNode<'a>), +{ + f(node); + for c in node.children() { + iter_nodes(c, f) + } +} + +pub fn iter_nodes_r<'a, F, T>(node: &'a comrak::nodes::AstNode<'a>, f: &F) -> Option +where + F: Fn(&'a comrak::nodes::AstNode<'a>) -> Option, +{ + let result = f(node); + if let Some(r) = result { + return Some(r); + } + for c in node.children() { + let result = iter_nodes_r(c, f); + if let Some(r) = result { + return Some(r); + } + } + None +}