From 7f9072c4a724cb7a53e635756c136470006cc36c Mon Sep 17 00:00:00 2001 From: djkato Date: Thu, 11 Jul 2024 19:21:50 +0200 Subject: [PATCH] switch to cbor file db --- Cargo.lock | 323 +----------------- sitemap-generator/Cargo.toml | 13 +- .../src/sitemap/event_handler.rs | 271 ++++++--------- sitemap-generator/src/sitemap/mod.rs | 51 +-- sitemap-generator/src/tests/mod.rs | 116 ++++--- 5 files changed, 215 insertions(+), 559 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4f38c9c..804432f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -514,15 +514,6 @@ dependencies = [ "generic-array", ] -[[package]] -name = "block-buffer" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" -dependencies = [ - "generic-array", -] - [[package]] name = "blocking" version = "1.5.1" @@ -563,16 +554,6 @@ dependencies = [ "syn_derive", ] -[[package]] -name = "bstr" -version = "1.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05efc5cfd9110c8416e471df0e96702d58690178e206e61b7173706673c93706" -dependencies = [ - "memchr", - "serde", -] - [[package]] name = "bumpalo" version = "3.15.3" @@ -853,7 +834,7 @@ dependencies = [ "hmac", "percent-encoding", "rand 0.8.5", - "sha2 0.9.9", + "sha2", "time 0.2.27", "version_check", ] @@ -898,15 +879,6 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dcb25d077389e53838a8158c8e99174c5a9d902dee4904320db714f3c653ffba" -[[package]] -name = "crc32fast" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa" -dependencies = [ - "cfg-if", -] - [[package]] name = "crossbeam-channel" version = "0.5.13" @@ -916,25 +888,6 @@ dependencies = [ "crossbeam-utils", ] -[[package]] -name = "crossbeam-deque" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" -dependencies = [ - "crossbeam-epoch", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-epoch" -version = "0.9.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" -dependencies = [ - "crossbeam-utils", -] - [[package]] name = "crossbeam-utils" version = "0.8.19" @@ -947,16 +900,6 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" -[[package]] -name = "crypto-common" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" -dependencies = [ - "generic-array", - "typenum", -] - [[package]] name = "crypto-mac" version = "0.10.0" @@ -1164,16 +1107,6 @@ dependencies = [ "generic-array", ] -[[package]] -name = "digest" -version = "0.10.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" -dependencies = [ - "block-buffer 0.10.4", - "crypto-common", -] - [[package]] name = "discard" version = "1.0.4" @@ -1315,27 +1248,6 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" -[[package]] -name = "fd-lock" -version = "4.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e5768da2206272c81ef0b5e951a41862938a6070da63bcea197899942d3b947" -dependencies = [ - "cfg-if", - "rustix 0.38.31", - "windows-sys 0.52.0", -] - -[[package]] -name = "flate2" -version = "1.0.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e" -dependencies = [ - "crc32fast", - "miniz_oxide", -] - [[package]] name = "flume" version = "0.9.2" @@ -1571,30 +1483,6 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" -[[package]] -name = "globset" -version = "0.4.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57da3b9b5b85bd66f31093f8c408b90a74431672542466497dcbdfdc02034be1" -dependencies = [ - "aho-corasick", - "bstr", - "log", - "regex-automata 0.4.6", - "regex-syntax 0.8.2", -] - -[[package]] -name = "globwalk" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93e3af942408868f6934a7b85134a3230832b9977cf66125df2f9edcfce4ddcc" -dependencies = [ - "bitflags 1.3.2", - "ignore", - "walkdir", -] - [[package]] name = "gloo-net" version = "0.5.0" @@ -1729,7 +1617,7 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51ab2f639c231793c5f6114bdb9bbe50a7dbbfcd7c7c6bd8475dec2d991e964f" dependencies = [ - "digest 0.9.0", + "digest", "hmac", ] @@ -1740,7 +1628,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c1441c6b1e930e2817404b5046f1f989899143a12bf92de603b69f4e0aee1e15" dependencies = [ "crypto-mac", - "digest 0.9.0", + "digest", ] [[package]] @@ -1946,22 +1834,6 @@ dependencies = [ "unicode-normalization", ] -[[package]] -name = "ignore" -version = "0.4.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b46810df39e66e925525d6e38ce1e7f6e1d208f72dc39757880fcb66e2c58af1" -dependencies = [ - "crossbeam-deque", - "globset", - "log", - "memchr", - "regex-automata 0.4.6", - "same-file", - "walkdir", - "winapi-util", -] - [[package]] name = "indexmap" version = "2.2.3" @@ -2778,57 +2650,6 @@ version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" -[[package]] -name = "pest" -version = "2.7.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56f8023d0fb78c8e03784ea1c7f3fa36e68a723138990b8d5a47d916b651e7a8" -dependencies = [ - "memchr", - "thiserror", - "ucd-trie", -] - -[[package]] -name = "pest_derive" -version = "2.7.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0d24f72393fd16ab6ac5738bc33cdb6a9aa73f8b902e8fe29cf4e67d7dd1026" -dependencies = [ - "pest", - "pest_generator", -] - -[[package]] -name = "pest_generator" -version = "2.7.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdc17e2a6c7d0a492f0158d7a4bd66cc17280308bbaff78d5bef566dca35ab80" -dependencies = [ - "pest", - "pest_meta", - "proc-macro2", - "quote", - "syn 2.0.48", -] - -[[package]] -name = "pest_meta" -version = "2.7.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "934cd7631c050f4674352a6e835d5f6711ffbfb9345c2fc0107155ac495ae293" -dependencies = [ - "once_cell", - "pest", - "sha2 0.10.8", -] - -[[package]] -name = "pico-args" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5be167a7af36ee22fe3115051bc51f6e6c7054c9348e28deb4f49bd6f705a315" - [[package]] name = "pin-project" version = "1.1.4" @@ -3051,16 +2872,6 @@ version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "007d8adb5ddab6f8e3f491ac63566a7d5002cc7ed73901f72057943fa71ae1ae" -[[package]] -name = "quick-xml" -version = "0.34.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f24d770aeca0eacb81ac29dfbc55ebcc09312fdd1f8bbecdc7e4a84e000e3b4" -dependencies = [ - "memchr", - "serde", -] - [[package]] name = "quote" version = "1.0.35" @@ -3170,26 +2981,6 @@ dependencies = [ "rand_core 0.5.1", ] -[[package]] -name = "rayon" -version = "1.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" -dependencies = [ - "either", - "rayon-core", -] - -[[package]] -name = "rayon-core" -version = "1.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" -dependencies = [ - "crossbeam-deque", - "crossbeam-utils", -] - [[package]] name = "redis" version = "0.25.3" @@ -3909,24 +3700,13 @@ version = "0.9.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4d58a1e1bf39749807d89cf2d98ac2dfa0ff1cb3faa38fbb64dd88ac8013d800" dependencies = [ - "block-buffer 0.9.0", + "block-buffer", "cfg-if", "cpufeatures", - "digest 0.9.0", + "digest", "opaque-debug", ] -[[package]] -name = "sha2" -version = "0.10.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" -dependencies = [ - "cfg-if", - "cpufeatures", - "digest 0.10.7", -] - [[package]] name = "sharded-slab" version = "0.1.7" @@ -4002,21 +3782,16 @@ dependencies = [ "cynic-codegen", "dotenvy", "envy", - "fd-lock", - "flate2", - "pico-args", - "quick-xml", - "rayon", "rstest", "saleor-app-sdk", "serde", "serde_cbor", "serde_json", "surf", - "tera", "thiserror", "tinytemplate", "tokio", + "toml", "tower", "tower-http", "tracing", @@ -4320,22 +4095,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "tera" -version = "1.19.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "970dff17c11e884a4a09bc76e3a17ef71e01bb13447a11e85226e254fe6d10b8" -dependencies = [ - "globwalk", - "lazy_static", - "pest", - "pest_derive", - "regex", - "serde", - "serde_json", - "unic-segment", -] - [[package]] name = "thiserror" version = "1.0.61" @@ -4529,21 +4288,21 @@ dependencies = [ [[package]] name = "toml" -version = "0.8.12" +version = "0.8.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9dd1545e8208b4a5af1aa9bbd0b4cf7e9ea08fabc5d0a5c67fcaafa17433aa3" +checksum = "6f49eb2ab21d2f26bd6db7bf383edc527a7ebaee412d17af4d40fdccd442f335" dependencies = [ "serde", "serde_spanned", "toml_datetime", - "toml_edit 0.22.12", + "toml_edit 0.22.15", ] [[package]] name = "toml_datetime" -version = "0.6.5" +version = "0.6.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3550f4e9685620ac18a50ed434eb3aec30db8ba93b0287467bca5826ea25baf1" +checksum = "4badfd56924ae69bcc9039335b2e017639ce3f9b001c393c1b2d1ef846ce2cbf" dependencies = [ "serde", ] @@ -4561,9 +4320,9 @@ dependencies = [ [[package]] name = "toml_edit" -version = "0.22.12" +version = "0.22.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3328d4f68a705b2a4498da1d580585d39a6510f98318a2cec3018a7ec61ddef" +checksum = "d59a3a72298453f564e2b111fa896f8d07fabb36f51f06d7e875fc5e0b5a3ef1" dependencies = [ "indexmap", "serde", @@ -4739,62 +4498,6 @@ version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" -[[package]] -name = "ucd-trie" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9" - -[[package]] -name = "unic-char-property" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8c57a407d9b6fa02b4795eb81c5b6652060a15a7903ea981f3d723e6c0be221" -dependencies = [ - "unic-char-range", -] - -[[package]] -name = "unic-char-range" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0398022d5f700414f6b899e10b8348231abf9173fa93144cbc1a43b9793c1fbc" - -[[package]] -name = "unic-common" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80d7ff825a6a654ee85a63e80f92f054f904f21e7d12da4e22f9834a4aaa35bc" - -[[package]] -name = "unic-segment" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4ed5d26be57f84f176157270c112ef57b86debac9cd21daaabbe56db0f88f23" -dependencies = [ - "unic-ucd-segment", -] - -[[package]] -name = "unic-ucd-segment" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2079c122a62205b421f499da10f3ee0f7697f012f55b675e002483c73ea34700" -dependencies = [ - "unic-char-property", - "unic-char-range", - "unic-ucd-version", -] - -[[package]] -name = "unic-ucd-version" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96bd2f2237fe450fcd0a1d2f5f4e91711124f7857ba2e964247776ebeeb7b0c4" -dependencies = [ - "unic-common", -] - [[package]] name = "unicase" version = "2.7.0" diff --git a/sitemap-generator/Cargo.toml b/sitemap-generator/Cargo.toml index 9e5790c..0128f3f 100644 --- a/sitemap-generator/Cargo.toml +++ b/sitemap-generator/Cargo.toml @@ -32,15 +32,16 @@ thiserror.workspace = true rstest.workspace = true async-std = { workspace = true, features = ["attributes"] } -tera = { version = "1.19.1", default-features = false } -fd-lock = "4.0.2" -quick-xml = { version = "0.34.0", features = ["serialize"] } -flate2 = "1.0.28" +toml = "0.8.14" +# tera = { version = "1.19.1", default-features = false } +# fd-lock = "4.0.2" +# quick-xml = { version = "0.34.0", features = ["serialize"] } +# flate2 = "1.0.28" tinytemplate = "1.2.1" chrono = { version = "0.4.34", features = ["serde"] } serde_cbor = "0.11.2" -pico-args = "0.5.0" -rayon = "1.10.0" +# pico-args = "0.5.0" +# rayon = "1.10.0" # itertools = "0.13.0" [build-dependencies] diff --git a/sitemap-generator/src/sitemap/event_handler.rs b/sitemap-generator/src/sitemap/event_handler.rs index 43e79e2..0566377 100644 --- a/sitemap-generator/src/sitemap/event_handler.rs +++ b/sitemap-generator/src/sitemap/event_handler.rs @@ -1,21 +1,24 @@ -use quick_xml::DeError; -use rayon::prelude::*; use std::{ fs::{self, read_dir, File}, - io::BufReader, + io::{BufReader, ErrorKind}, path::PathBuf, }; use tinytemplate::TinyTemplate; -use crate::{app::SitemapConfig, queries::event_subjects_updated::Event, sitemap::Url}; +use crate::{ + app::SitemapConfig, + queries::event_subjects_updated::{Event, Product}, + sitemap::{ItemType, Url}, +}; use tokio::{sync::mpsc::Receiver, task::JoinHandle}; use tracing::{debug, error, trace, warn}; -use super::{RefType, UrlSet}; +use super::UrlSet; // 10k links google says, but there's also a size limit and my custom params might be messing with // that? Rather split prematurely to be sure. const MAX_URL_IN_SET: usize = 6000; +const DB_FILE_NAME: &str = "db.toml"; pub struct EventHandler { receiver: Receiver<(Event, SitemapConfig)>, @@ -23,107 +26,25 @@ pub struct EventHandler { impl EventHandler { pub fn start(receiver: Receiver<(Event, SitemapConfig)>) -> JoinHandle<()> { - let mut s = Self { receiver }; + let s = Self { receiver }; tokio::spawn(s.listen()) } async fn listen(mut self) { while let Some((message, sitemap_config)) = self.receiver.recv().await { match message { - Event::ProductCreated(product) => {} - Event::ProductUpdated(product) => { + Event::ProductCreated(product) => { if let Some(product) = product.product { - let mut url_sets = read_xmls(&sitemap_config.target_folder).await; - let mut was_any_set_affected = false; - - //in case no sitemaps exist yet, create first urlset - if url_sets.is_empty() { - let url_set = UrlSet::new(); - url_sets.push(( - url_set, - std::path::Path::new(&format!( - "{}/0.xml", - sitemap_config.target_folder - )) - .to_path_buf(), - )); - } - - // check if any url_sets contain affected urls - for (set, path) in &mut url_sets { - let mut affected_urls = set.find_urls(product.id.inner()); - - if affected_urls.len() == 0 { - trace!("Product doesn't exist in url_set {:?}", path); - continue; - } - was_any_set_affected = true; - - // Update affected urls - affected_urls.iter_mut().for_each(|url| { - let mut templater = TinyTemplate::new(); - templater - .add_template("product", &sitemap_config.product_template) - .expect("Check your url templates!"); - let new_loc = templater - .render("product", &product) - .expect("Check your url templates!"); - debug!("updated `{}` to `{}`", &url.loc, new_loc); - url.loc = new_loc; - }); - } - - //create product url if no set contained url with it - if !was_any_set_affected { - debug!("Product isn't in any sitemap, creating..."); - if let Some((last_url_set, _)) = url_sets.last_mut() { - if product.category.is_none() { - debug!("product missing category, hopefully not needed in url template?"); - } - last_url_set.url.push(Url::new_with_ref( - product.id.inner().to_owned(), - product.slug, - RefType::Product, - product.category.clone().map(|c| c.id.inner().to_owned()), - product.category.clone().map(|c| c.slug), - Some(RefType::Category), - )); - } - } - - let mut split_url_sets = vec![]; - //write first time, if some throw too long error, split and try in second - //loop - for url_set in url_sets { - if let Err(e) = write_urlset_to_file(&url_set).await { - match e { - WriteUrlSetToFileErr::UrlSetTooLong(l) => { - debug!("url set too large ({l}), splitting..."); - if let Some(mut new_url_sets) = - split_urlset_to_new_file(url_set).await - { - split_url_sets.append(&mut new_url_sets); - } - } - e => error!("{:?}", e), - } - }; - } - - //the second attempt - for url_set in split_url_sets { - if let Err(e) = write_urlset_to_file(&url_set).await { - match e { - WriteUrlSetToFileErr::UrlSetTooLong(l) => { - error!("url set STILL too large?? ({l}), ignoring url set {:?}...", url_set); - } - e => error!("{:?}", e), - } - }; - } + product_update_or_create(product, sitemap_config).await; } warn!("Event::ProductCreated missing product"); } + Event::ProductUpdated(product) => { + if let Some(product) = product.product { + product_update_or_create(product, sitemap_config).await; + } + warn!("Event::ProductUpdated missing product"); + } Event::ProductDeleted(product) => {} Event::CategoryCreated(category) => {} Event::CategoryUpdated(category) => {} @@ -140,89 +61,107 @@ impl EventHandler { } } -async fn read_xmls(target_folder: &str) -> Vec<(UrlSet, PathBuf)> { - let paths = read_dir(target_folder).unwrap(); - let all_urls: Vec<(UrlSet, PathBuf)> = paths - .into_iter() - .par_bridge() - .filter_map(|path| { - if let Ok(path) = path { - if path.path().is_file() { - let file = File::open(path.path()).expect("Unable to open file"); - let reader = BufReader::new(file); - return Some((quick_xml::de::from_reader(reader).unwrap(), path.path())); +async fn product_delete(product: Product, sitemap_config: SitemapConfig) { + let mut url_set = match get_from_file(&sitemap_config.target_folder).await { + Ok(u) => u, + Err(e) => match e { + UrlSetFileOperationsErr::IoResult(e) => match e.kind() { + ErrorKind::NotFound => UrlSet::new(), + _ => { + error!("File errror: {:?}\n won't crash, but probably broken.", e); + return; } + }, + UrlSetFileOperationsErr::DeError(e) => { + error!( + "DE error: {:?}\n Won't crash, but something went badly wrong", + e + ); + return; } - return None; - }) - .collect(); - all_urls -} + }, + }; -/** -* fails `if url_set.url.len() > MAX_URL_IN_SET` -*/ -async fn split_urlset_to_new_file(union: (UrlSet, PathBuf)) -> Option> { - let (url_set, path) = union; + url_set.flush_related(product.id.inner()); - if url_set.url.len() < MAX_URL_IN_SET { - return None; - } - - let mut was_original_file_assigned = false; - let chunks = url_set.url.chunks(MAX_URL_IN_SET).collect::>(); - - let mut file_number = path - .file_stem() - .unwrap() - .to_str() - .unwrap() - .parse::() + write_to_file(&url_set, &sitemap_config.target_folder) + .await .unwrap(); - - return Some( - chunks - .into_iter() - .map(|urls| { - let folder = path.clone().parent().unwrap().to_str().unwrap().to_owned(); - - //keep incrementing file number till a file with that number is free to use - if !was_original_file_assigned { - was_original_file_assigned = true - } else { - while !std::path::Path::new(&format!("{folder}/{file_number}.xml")).exists() { - file_number = file_number + 1; - } - } - - let mut url_set = UrlSet::new(); - url_set.url = urls.into(); - ( - url_set, - std::path::Path::new(&format!("{folder}/{file_number}.xml")).to_path_buf(), - ) - }) - .collect::>(), - ); } -async fn write_urlset_to_file( - url_set_n_path: &(UrlSet, PathBuf), -) -> Result<(), WriteUrlSetToFileErr> { - let (url_set, path) = url_set_n_path; - if url_set.url.len() > MAX_URL_IN_SET { - return Err(WriteUrlSetToFileErr::UrlSetTooLong(url_set.url.len())); +async fn product_update_or_create(product: Product, sitemap_config: SitemapConfig) { + let mut url_set = match get_from_file(&sitemap_config.target_folder).await { + Ok(u) => u, + Err(e) => match e { + UrlSetFileOperationsErr::IoResult(e) => match e.kind() { + ErrorKind::NotFound => UrlSet::new(), + _ => { + error!("File errror: {:?}\n won't crash, but probably broken.", e); + return; + } + }, + UrlSetFileOperationsErr::DeError(e) => { + error!( + "DE error: {:?}\n Won't crash, but something went badly wrong", + e + ); + return; + } + }, + }; + + let mut affected_urls = url_set.find_affected(product.id.inner(), &product.slug); + debug!("affected urls: {:?}", &affected_urls); + + if affected_urls.len() == 0 { + trace!("Product doesn't exist in url_set yet"); + url_set.push(Url::new_product(&sitemap_config.product_template, product).unwrap()); + } else { + // Update affected urls + affected_urls.iter_mut().for_each(|url| { + let mut templater = TinyTemplate::new(); + templater + .add_template("product", &sitemap_config.product_template) + .expect("Check your url templates!"); + let new_loc = templater + .render("product", &product) + .expect("Check your url templates!"); + debug!("updated `{}` to `{}`", &url.url, new_loc); + url.url = new_loc; + }); } - fs::write(path, &quick_xml::se::to_string(&url_set)?)?; + write_to_file(&url_set, &sitemap_config.target_folder) + .await + .unwrap(); +} + +async fn get_from_file(target_folder: &str) -> Result { + let urls: UrlSet = + serde_cbor::de::from_slice(&std::fs::read(format!("{target_folder}/{DB_FILE_NAME}"))?)?; + Ok(urls) +} + +async fn write_to_file( + url_set: &UrlSet, + target_folder: &str, +) -> Result<(), UrlSetFileOperationsErr> { + if url_set.len() > MAX_URL_IN_SET { + // return Err(UrlSetFileOperationsErr::UrlSetTooLong(url_set.len())); + warn!("Urlset exeeded {MAX_URL_IN_SET} links, search engines might start to complain!"); + } + fs::write( + format!("{target_folder}/{DB_FILE_NAME}"), + &serde_cbor::to_vec(url_set)?, + )?; Ok(()) } #[derive(thiserror::Error, Debug)] -pub enum WriteUrlSetToFileErr { +pub enum UrlSetFileOperationsErr { #[error("writing error")] IoResult(#[from] std::io::Error), - #[error("Url set length exeeds xml standard of 10k entries per file")] - UrlSetTooLong(usize), + // #[error("Url set length exeeds xml standard of 10k entries per file")] + // UrlSetTooLong(usize), #[error("{0}")] - DeError(#[from] DeError), + DeError(#[from] serde_cbor::Error), } diff --git a/sitemap-generator/src/sitemap/mod.rs b/sitemap-generator/src/sitemap/mod.rs index 3673e14..b202c92 100644 --- a/sitemap-generator/src/sitemap/mod.rs +++ b/sitemap-generator/src/sitemap/mod.rs @@ -11,10 +11,7 @@ use tinytemplate::TinyTemplate; use crate::{ app::SitemapConfig, - queries::{ - event_subjects_updated::{Category, Collection, Page, Product, ProductUpdated}, - get_all_categories_n_products::Product, - }, + queries::event_subjects_updated::{Category, Collection, Page, Product, ProductUpdated}, }; const SITEMAP_XMLNS: &str = "http://sitemaps.org/schemas/sitemap/0.9"; @@ -52,6 +49,27 @@ impl UrlSet { pub fn new() -> Self { Self { urls: vec![] } } + + pub fn flush_related(&mut self, id: &str) { + self.retain(|u| u.data.id != id && u.related.as_ref().map_or(true, |ud| ud.id != id)); + } + + pub fn find_related(&mut self, id: &str) -> Vec<&mut Url> { + self.iter_mut() + .filter(|u| u.data.id == id || u.related.as_ref().map_or(false, |ud| ud.id == id)) + .collect() + } + + pub fn find_affected(&mut self, id: &str, slug: &str) -> Vec<&mut Url> { + self.iter_mut() + .filter(|u| { + u.data.id == id && u.data.slug != slug + || u.related + .as_ref() + .map_or(false, |ud| ud.id == id && ud.slug != slug) + }) + .collect() + } } impl Deref for UrlSet { @@ -68,10 +86,7 @@ impl DerefMut for UrlSet { } impl Url { - pub fn new_product( - sitemap_config: &SitemapConfig, - product: Product, - ) -> Result { + pub fn new_product(template: &str, product: Product) -> Result { let category = product .category .as_ref() @@ -91,16 +106,13 @@ impl Url { let mut tt = TinyTemplate::new(); - tt.add_template("t", &sitemap_config.product_template); + tt.add_template("t", template)?; let url = tt.render("t", &product)?; Ok(Self { url, data, related }) } - pub fn new_category( - sitemap_config: &SitemapConfig, - category: Category, - ) -> Result { + pub fn new_category(template: &str, category: Category) -> Result { let data = ItemData { id: category.id.inner().to_owned(), slug: category.slug.clone(), @@ -108,7 +120,7 @@ impl Url { }; let mut tt = TinyTemplate::new(); - tt.add_template("t", &sitemap_config.category_template); + tt.add_template("t", template)?; let url = tt.render("t", &category)?; Ok(Self { @@ -118,10 +130,7 @@ impl Url { }) } - pub fn new_collection( - sitemap_config: &SitemapConfig, - collection: Collection, - ) -> Result { + pub fn new_collection(template: &str, collection: Collection) -> Result { let data = ItemData { id: collection.id.inner().to_owned(), slug: collection.slug.clone(), @@ -129,7 +138,7 @@ impl Url { }; let mut tt = TinyTemplate::new(); - tt.add_template("t", &sitemap_config.collection_template); + tt.add_template("t", template); let url = tt.render("t", &collection)?; Ok(Self { @@ -139,7 +148,7 @@ impl Url { }) } - pub fn new_page(sitemap_config: &SitemapConfig, page: Page) -> Result { + pub fn new_page(template: &str, page: Page) -> Result { let data = ItemData { id: page.id.inner().to_owned(), slug: page.slug.clone(), @@ -147,7 +156,7 @@ impl Url { }; let mut tt = TinyTemplate::new(); - tt.add_template("t", &sitemap_config.pages_template); + tt.add_template("t", template); let url = tt.render("t", &page)?; Ok(Self { diff --git a/sitemap-generator/src/tests/mod.rs b/sitemap-generator/src/tests/mod.rs index ccbfe58..548975b 100644 --- a/sitemap-generator/src/tests/mod.rs +++ b/sitemap-generator/src/tests/mod.rs @@ -2,7 +2,7 @@ use crate::{ app::{trace_to_std, SitemapConfig}, create_app, queries::event_subjects_updated::{Category, Product, ProductUpdated}, - sitemap::{RefType, Url, UrlSet}, + sitemap::{Url, UrlSet}, }; use axum::{ body::Body, @@ -79,12 +79,17 @@ async fn index_returns_ok() { #[rstest] async fn updates_xml_from_product() { let mut app = init_test_app().await; - // let app = app.ready().await.unwrap(); - let product_id = cynic::Id::new("product1".to_owned()); - let product_slug = "product1slug".to_owned(); - let category_id = cynic::Id::new("category1".to_owned()); - let category_slug = "category1slug".to_owned(); + let product_updated = ProductUpdated { + product: Some(Product { + id: cynic::Id::new("product1".to_owned()), + slug: "product1slug".to_owned(), + category: Some(Category { + slug: "category1slug".to_owned(), + id: cynic::Id::new("category1".to_owned()), + }), + }), + }; let response = app .ready() @@ -94,17 +99,7 @@ async fn updates_xml_from_product() { Request::builder() .uri("/api/webhooks") .body(Body::from( - serde_json::to_string_pretty(&ProductUpdated { - product: Some(Product { - id: product_id.clone(), - slug: product_slug.clone(), - category: Some(Category { - slug: category_slug.clone(), - id: category_id.clone(), - }), - }), - }) - .unwrap(), + serde_json::to_string_pretty(&product_updated).unwrap(), )) .unwrap(), ) @@ -117,14 +112,11 @@ async fn updates_xml_from_product() { serde_json::from_str(&std::fs::read_to_string("./temp/sitemaps/1.xml").unwrap()).unwrap(); let mut webhook_url_set = UrlSet::new(); - webhook_url_set.url = vec![Url::new_with_ref( - product_id.inner().to_owned(), - product_slug.clone(), - RefType::Product, - Some(category_id.inner().to_owned()), - Some(category_slug.clone()), - Some(RefType::Category), - )]; + webhook_url_set.urls = vec![Url::new_product( + "https://example.com/{product.category.slug}/{product.slug}", + product_updated.product.unwrap(), + ) + .unwrap()]; assert_eq!(xml, webhook_url_set); } @@ -132,38 +124,50 @@ async fn updates_xml_from_product() { #[rstest] fn urlset_serialisation_isnt_lossy() { std::env::set_var("APP_API_BASE_URL", "http://localhost:3000"); + let sitemap_config = SitemapConfig { + target_folder: "./temp/sitemaps".to_string(), + pages_template: "https://example.com/{page.slug}".to_string(), + index_hostname: "https://example.com".to_string(), + product_template: "https://example.com/{product.category.slug}/{product.slug}".to_string(), + category_template: "https://example.com/{category.slug}".to_string(), + collection_template: "https://example.com/collection/{collection.slug}".to_string(), + }; + init_tracing(); + let product1 = Product { + id: cynic::Id::new("product1".to_owned()), + slug: "product1slug".to_owned(), + category: Some(Category { + slug: "category1slug".to_owned(), + id: cynic::Id::new("category1".to_owned()), + }), + }; + + let product2 = Product { + id: cynic::Id::new("product2".to_owned()), + slug: "product2slug".to_owned(), + category: Some(Category { + slug: "category2slug".to_owned(), + id: cynic::Id::new("category2".to_owned()), + }), + }; + let mut url_set = UrlSet::new(); - url_set.url.append(&mut vec![ - Url::new( - "category1coolid".to_string(), - "category1".to_string(), - RefType::Category, - ), - Url::new( - "Collection1coolid".to_string(), - "Collection1".to_string(), - RefType::Collection, - ), - Url::new_with_ref( - "category1coolid".to_string(), - "category1".to_string(), - RefType::Product, - Some("product1coolid".to_string()), - Some("product1".to_string()), - Some(RefType::Category), - ), - Url::new_with_ref( - "category2coolid".to_string(), - "category2".to_string(), - RefType::Product, - Some("product2coolid".to_string()), - Some("product2".to_string()), - Some(RefType::Category), - ), - ]); - let file_str = quick_xml::se::to_string(&url_set).unwrap(); - dbg!(&file_str); - let deserialized_url_set: UrlSet = quick_xml::de::from_str(&file_str).unwrap(); + url_set.urls = vec![ + Url::new_category( + &sitemap_config.category_template, + product1.category.clone().unwrap(), + ) + .unwrap(), + Url::new_product(&sitemap_config.product_template, product1).unwrap(), + Url::new_category( + &sitemap_config.category_template, + product2.category.clone().unwrap(), + ) + .unwrap(), + Url::new_product(&sitemap_config.product_template, product2).unwrap(), + ]; + let file_str = serde_cbor::to_vec(&url_set).unwrap(); + let deserialized_url_set: UrlSet = serde_cbor::de::from_slice(&file_str).unwrap(); assert_eq!(url_set, deserialized_url_set); }