From f7ae47cf84326ada07944ced35181e57db9cc810 Mon Sep 17 00:00:00 2001 From: Phodal Huang Date: Tue, 28 Nov 2023 14:41:22 +0800 Subject: [PATCH] feat(server): init for file walker --- local_server/.gitignore | 3 ++- local_server/Cargo.toml | 1 + local_server/src/doc_split/file_walker.rs | 31 +++++++++++++++++++++++ local_server/src/doc_split/mod.rs | 22 ++++++++++++++++ local_server/src/main.rs | 1 + 5 files changed, 57 insertions(+), 1 deletion(-) create mode 100644 local_server/src/doc_split/file_walker.rs create mode 100644 local_server/src/doc_split/mod.rs diff --git a/local_server/.gitignore b/local_server/.gitignore index 9804392..da3389f 100644 --- a/local_server/.gitignore +++ b/local_server/.gitignore @@ -6,4 +6,5 @@ pkg/ wasm-pack.log .idea !bin/*.rs -*.db \ No newline at end of file +*.db +testdocs diff --git a/local_server/Cargo.toml b/local_server/Cargo.toml index 4c29b37..e741a73 100644 --- a/local_server/Cargo.toml +++ b/local_server/Cargo.toml @@ -10,6 +10,7 @@ actix-web = "4" # core flume = "0.11.0" +ignore = "=0.4.20" anyhow = "1.0.75" diff --git a/local_server/src/doc_split/file_walker.rs b/local_server/src/doc_split/file_walker.rs new file mode 100644 index 0000000..acda822 --- /dev/null +++ b/local_server/src/doc_split/file_walker.rs @@ -0,0 +1,31 @@ +use std::fs::canonicalize; +use std::path::{Path, PathBuf}; + +use tracing::{debug, warn}; + +pub struct FileWalker { + file_list: Vec, +} + +impl FileWalker { + pub fn index_directory(dir: impl AsRef) -> Vec { + let walker = ignore::WalkBuilder::new(&dir) + .standard_filters(true) + .hidden(false) + .build(); + + let file_list = walker + .filter_map(|de| match de { + Ok(de) => Some(de), + Err(err) => { + warn!(%err, "access failure; skipping"); + None + } + }) + .filter(|de| !de.path().strip_prefix(&dir).unwrap().starts_with(".git")) + .filter_map(|de| canonicalize(de.into_path()).ok()) + .collect(); + + file_list + } +} \ No newline at end of file diff --git a/local_server/src/doc_split/mod.rs b/local_server/src/doc_split/mod.rs new file mode 100644 index 0000000..57bd627 --- /dev/null +++ b/local_server/src/doc_split/mod.rs @@ -0,0 +1,22 @@ +pub mod file_walker; + +use std::path::PathBuf; +fn doc_splitter(filename: &PathBuf) { + println!("doc_splitter: {}", filename.display()); +} + +#[cfg(test)] +mod tests { + use crate::doc_split::file_walker::FileWalker; + use super::*; + + #[test] + fn test_doc_splitter() { + let testdir = PathBuf::from("testdocs"); + let files = FileWalker::index_directory(testdir); + + for file in files { + doc_splitter(&file); + } + } +} \ No newline at end of file diff --git a/local_server/src/main.rs b/local_server/src/main.rs index f2d2fca..a0280ca 100644 --- a/local_server/src/main.rs +++ b/local_server/src/main.rs @@ -10,6 +10,7 @@ pub mod scraper; mod document_handler; pub mod app_state; pub mod infra; +pub mod doc_split; #[actix_web::main] async fn main() -> std::io::Result<()> {