switch to cbor file db

This commit is contained in:
djkato 2024-07-11 19:21:50 +02:00
parent 522b2cad31
commit 7f9072c4a7
5 changed files with 215 additions and 559 deletions

323
Cargo.lock generated
View file

@ -514,15 +514,6 @@ dependencies = [
"generic-array", "generic-array",
] ]
[[package]]
name = "block-buffer"
version = "0.10.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
dependencies = [
"generic-array",
]
[[package]] [[package]]
name = "blocking" name = "blocking"
version = "1.5.1" version = "1.5.1"
@ -563,16 +554,6 @@ dependencies = [
"syn_derive", "syn_derive",
] ]
[[package]]
name = "bstr"
version = "1.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05efc5cfd9110c8416e471df0e96702d58690178e206e61b7173706673c93706"
dependencies = [
"memchr",
"serde",
]
[[package]] [[package]]
name = "bumpalo" name = "bumpalo"
version = "3.15.3" version = "3.15.3"
@ -853,7 +834,7 @@ dependencies = [
"hmac", "hmac",
"percent-encoding", "percent-encoding",
"rand 0.8.5", "rand 0.8.5",
"sha2 0.9.9", "sha2",
"time 0.2.27", "time 0.2.27",
"version_check", "version_check",
] ]
@ -898,15 +879,6 @@ version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dcb25d077389e53838a8158c8e99174c5a9d902dee4904320db714f3c653ffba" checksum = "dcb25d077389e53838a8158c8e99174c5a9d902dee4904320db714f3c653ffba"
[[package]]
name = "crc32fast"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa"
dependencies = [
"cfg-if",
]
[[package]] [[package]]
name = "crossbeam-channel" name = "crossbeam-channel"
version = "0.5.13" version = "0.5.13"
@ -916,25 +888,6 @@ dependencies = [
"crossbeam-utils", "crossbeam-utils",
] ]
[[package]]
name = "crossbeam-deque"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d"
dependencies = [
"crossbeam-epoch",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
dependencies = [
"crossbeam-utils",
]
[[package]] [[package]]
name = "crossbeam-utils" name = "crossbeam-utils"
version = "0.8.19" version = "0.8.19"
@ -947,16 +900,6 @@ version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7"
[[package]]
name = "crypto-common"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
dependencies = [
"generic-array",
"typenum",
]
[[package]] [[package]]
name = "crypto-mac" name = "crypto-mac"
version = "0.10.0" version = "0.10.0"
@ -1164,16 +1107,6 @@ dependencies = [
"generic-array", "generic-array",
] ]
[[package]]
name = "digest"
version = "0.10.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
dependencies = [
"block-buffer 0.10.4",
"crypto-common",
]
[[package]] [[package]]
name = "discard" name = "discard"
version = "1.0.4" version = "1.0.4"
@ -1315,27 +1248,6 @@ version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5"
[[package]]
name = "fd-lock"
version = "4.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7e5768da2206272c81ef0b5e951a41862938a6070da63bcea197899942d3b947"
dependencies = [
"cfg-if",
"rustix 0.38.31",
"windows-sys 0.52.0",
]
[[package]]
name = "flate2"
version = "1.0.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e"
dependencies = [
"crc32fast",
"miniz_oxide",
]
[[package]] [[package]]
name = "flume" name = "flume"
version = "0.9.2" version = "0.9.2"
@ -1571,30 +1483,6 @@ version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
[[package]]
name = "globset"
version = "0.4.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57da3b9b5b85bd66f31093f8c408b90a74431672542466497dcbdfdc02034be1"
dependencies = [
"aho-corasick",
"bstr",
"log",
"regex-automata 0.4.6",
"regex-syntax 0.8.2",
]
[[package]]
name = "globwalk"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93e3af942408868f6934a7b85134a3230832b9977cf66125df2f9edcfce4ddcc"
dependencies = [
"bitflags 1.3.2",
"ignore",
"walkdir",
]
[[package]] [[package]]
name = "gloo-net" name = "gloo-net"
version = "0.5.0" version = "0.5.0"
@ -1729,7 +1617,7 @@ version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51ab2f639c231793c5f6114bdb9bbe50a7dbbfcd7c7c6bd8475dec2d991e964f" checksum = "51ab2f639c231793c5f6114bdb9bbe50a7dbbfcd7c7c6bd8475dec2d991e964f"
dependencies = [ dependencies = [
"digest 0.9.0", "digest",
"hmac", "hmac",
] ]
@ -1740,7 +1628,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c1441c6b1e930e2817404b5046f1f989899143a12bf92de603b69f4e0aee1e15" checksum = "c1441c6b1e930e2817404b5046f1f989899143a12bf92de603b69f4e0aee1e15"
dependencies = [ dependencies = [
"crypto-mac", "crypto-mac",
"digest 0.9.0", "digest",
] ]
[[package]] [[package]]
@ -1946,22 +1834,6 @@ dependencies = [
"unicode-normalization", "unicode-normalization",
] ]
[[package]]
name = "ignore"
version = "0.4.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b46810df39e66e925525d6e38ce1e7f6e1d208f72dc39757880fcb66e2c58af1"
dependencies = [
"crossbeam-deque",
"globset",
"log",
"memchr",
"regex-automata 0.4.6",
"same-file",
"walkdir",
"winapi-util",
]
[[package]] [[package]]
name = "indexmap" name = "indexmap"
version = "2.2.3" version = "2.2.3"
@ -2778,57 +2650,6 @@ version = "2.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
[[package]]
name = "pest"
version = "2.7.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56f8023d0fb78c8e03784ea1c7f3fa36e68a723138990b8d5a47d916b651e7a8"
dependencies = [
"memchr",
"thiserror",
"ucd-trie",
]
[[package]]
name = "pest_derive"
version = "2.7.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0d24f72393fd16ab6ac5738bc33cdb6a9aa73f8b902e8fe29cf4e67d7dd1026"
dependencies = [
"pest",
"pest_generator",
]
[[package]]
name = "pest_generator"
version = "2.7.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fdc17e2a6c7d0a492f0158d7a4bd66cc17280308bbaff78d5bef566dca35ab80"
dependencies = [
"pest",
"pest_meta",
"proc-macro2",
"quote",
"syn 2.0.48",
]
[[package]]
name = "pest_meta"
version = "2.7.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "934cd7631c050f4674352a6e835d5f6711ffbfb9345c2fc0107155ac495ae293"
dependencies = [
"once_cell",
"pest",
"sha2 0.10.8",
]
[[package]]
name = "pico-args"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5be167a7af36ee22fe3115051bc51f6e6c7054c9348e28deb4f49bd6f705a315"
[[package]] [[package]]
name = "pin-project" name = "pin-project"
version = "1.1.4" version = "1.1.4"
@ -3051,16 +2872,6 @@ version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "007d8adb5ddab6f8e3f491ac63566a7d5002cc7ed73901f72057943fa71ae1ae" checksum = "007d8adb5ddab6f8e3f491ac63566a7d5002cc7ed73901f72057943fa71ae1ae"
[[package]]
name = "quick-xml"
version = "0.34.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6f24d770aeca0eacb81ac29dfbc55ebcc09312fdd1f8bbecdc7e4a84e000e3b4"
dependencies = [
"memchr",
"serde",
]
[[package]] [[package]]
name = "quote" name = "quote"
version = "1.0.35" version = "1.0.35"
@ -3170,26 +2981,6 @@ dependencies = [
"rand_core 0.5.1", "rand_core 0.5.1",
] ]
[[package]]
name = "rayon"
version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa"
dependencies = [
"either",
"rayon-core",
]
[[package]]
name = "rayon-core"
version = "1.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
dependencies = [
"crossbeam-deque",
"crossbeam-utils",
]
[[package]] [[package]]
name = "redis" name = "redis"
version = "0.25.3" version = "0.25.3"
@ -3909,24 +3700,13 @@ version = "0.9.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4d58a1e1bf39749807d89cf2d98ac2dfa0ff1cb3faa38fbb64dd88ac8013d800" checksum = "4d58a1e1bf39749807d89cf2d98ac2dfa0ff1cb3faa38fbb64dd88ac8013d800"
dependencies = [ dependencies = [
"block-buffer 0.9.0", "block-buffer",
"cfg-if", "cfg-if",
"cpufeatures", "cpufeatures",
"digest 0.9.0", "digest",
"opaque-debug", "opaque-debug",
] ]
[[package]]
name = "sha2"
version = "0.10.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8"
dependencies = [
"cfg-if",
"cpufeatures",
"digest 0.10.7",
]
[[package]] [[package]]
name = "sharded-slab" name = "sharded-slab"
version = "0.1.7" version = "0.1.7"
@ -4002,21 +3782,16 @@ dependencies = [
"cynic-codegen", "cynic-codegen",
"dotenvy", "dotenvy",
"envy", "envy",
"fd-lock",
"flate2",
"pico-args",
"quick-xml",
"rayon",
"rstest", "rstest",
"saleor-app-sdk", "saleor-app-sdk",
"serde", "serde",
"serde_cbor", "serde_cbor",
"serde_json", "serde_json",
"surf", "surf",
"tera",
"thiserror", "thiserror",
"tinytemplate", "tinytemplate",
"tokio", "tokio",
"toml",
"tower", "tower",
"tower-http", "tower-http",
"tracing", "tracing",
@ -4320,22 +4095,6 @@ dependencies = [
"windows-sys 0.52.0", "windows-sys 0.52.0",
] ]
[[package]]
name = "tera"
version = "1.19.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "970dff17c11e884a4a09bc76e3a17ef71e01bb13447a11e85226e254fe6d10b8"
dependencies = [
"globwalk",
"lazy_static",
"pest",
"pest_derive",
"regex",
"serde",
"serde_json",
"unic-segment",
]
[[package]] [[package]]
name = "thiserror" name = "thiserror"
version = "1.0.61" version = "1.0.61"
@ -4529,21 +4288,21 @@ dependencies = [
[[package]] [[package]]
name = "toml" name = "toml"
version = "0.8.12" version = "0.8.14"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e9dd1545e8208b4a5af1aa9bbd0b4cf7e9ea08fabc5d0a5c67fcaafa17433aa3" checksum = "6f49eb2ab21d2f26bd6db7bf383edc527a7ebaee412d17af4d40fdccd442f335"
dependencies = [ dependencies = [
"serde", "serde",
"serde_spanned", "serde_spanned",
"toml_datetime", "toml_datetime",
"toml_edit 0.22.12", "toml_edit 0.22.15",
] ]
[[package]] [[package]]
name = "toml_datetime" name = "toml_datetime"
version = "0.6.5" version = "0.6.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3550f4e9685620ac18a50ed434eb3aec30db8ba93b0287467bca5826ea25baf1" checksum = "4badfd56924ae69bcc9039335b2e017639ce3f9b001c393c1b2d1ef846ce2cbf"
dependencies = [ dependencies = [
"serde", "serde",
] ]
@ -4561,9 +4320,9 @@ dependencies = [
[[package]] [[package]]
name = "toml_edit" name = "toml_edit"
version = "0.22.12" version = "0.22.15"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3328d4f68a705b2a4498da1d580585d39a6510f98318a2cec3018a7ec61ddef" checksum = "d59a3a72298453f564e2b111fa896f8d07fabb36f51f06d7e875fc5e0b5a3ef1"
dependencies = [ dependencies = [
"indexmap", "indexmap",
"serde", "serde",
@ -4739,62 +4498,6 @@ version = "1.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825"
[[package]]
name = "ucd-trie"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9"
[[package]]
name = "unic-char-property"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8c57a407d9b6fa02b4795eb81c5b6652060a15a7903ea981f3d723e6c0be221"
dependencies = [
"unic-char-range",
]
[[package]]
name = "unic-char-range"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0398022d5f700414f6b899e10b8348231abf9173fa93144cbc1a43b9793c1fbc"
[[package]]
name = "unic-common"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "80d7ff825a6a654ee85a63e80f92f054f904f21e7d12da4e22f9834a4aaa35bc"
[[package]]
name = "unic-segment"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e4ed5d26be57f84f176157270c112ef57b86debac9cd21daaabbe56db0f88f23"
dependencies = [
"unic-ucd-segment",
]
[[package]]
name = "unic-ucd-segment"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2079c122a62205b421f499da10f3ee0f7697f012f55b675e002483c73ea34700"
dependencies = [
"unic-char-property",
"unic-char-range",
"unic-ucd-version",
]
[[package]]
name = "unic-ucd-version"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96bd2f2237fe450fcd0a1d2f5f4e91711124f7857ba2e964247776ebeeb7b0c4"
dependencies = [
"unic-common",
]
[[package]] [[package]]
name = "unicase" name = "unicase"
version = "2.7.0" version = "2.7.0"

View file

@ -32,15 +32,16 @@ thiserror.workspace = true
rstest.workspace = true rstest.workspace = true
async-std = { workspace = true, features = ["attributes"] } async-std = { workspace = true, features = ["attributes"] }
tera = { version = "1.19.1", default-features = false } toml = "0.8.14"
fd-lock = "4.0.2" # tera = { version = "1.19.1", default-features = false }
quick-xml = { version = "0.34.0", features = ["serialize"] } # fd-lock = "4.0.2"
flate2 = "1.0.28" # quick-xml = { version = "0.34.0", features = ["serialize"] }
# flate2 = "1.0.28"
tinytemplate = "1.2.1" tinytemplate = "1.2.1"
chrono = { version = "0.4.34", features = ["serde"] } chrono = { version = "0.4.34", features = ["serde"] }
serde_cbor = "0.11.2" serde_cbor = "0.11.2"
pico-args = "0.5.0" # pico-args = "0.5.0"
rayon = "1.10.0" # rayon = "1.10.0"
# itertools = "0.13.0" # itertools = "0.13.0"
[build-dependencies] [build-dependencies]

View file

@ -1,21 +1,24 @@
use quick_xml::DeError;
use rayon::prelude::*;
use std::{ use std::{
fs::{self, read_dir, File}, fs::{self, read_dir, File},
io::BufReader, io::{BufReader, ErrorKind},
path::PathBuf, path::PathBuf,
}; };
use tinytemplate::TinyTemplate; use tinytemplate::TinyTemplate;
use crate::{app::SitemapConfig, queries::event_subjects_updated::Event, sitemap::Url}; use crate::{
app::SitemapConfig,
queries::event_subjects_updated::{Event, Product},
sitemap::{ItemType, Url},
};
use tokio::{sync::mpsc::Receiver, task::JoinHandle}; use tokio::{sync::mpsc::Receiver, task::JoinHandle};
use tracing::{debug, error, trace, warn}; use tracing::{debug, error, trace, warn};
use super::{RefType, UrlSet}; use super::UrlSet;
// 10k links google says, but there's also a size limit and my custom params might be messing with // 10k links google says, but there's also a size limit and my custom params might be messing with
// that? Rather split prematurely to be sure. // that? Rather split prematurely to be sure.
const MAX_URL_IN_SET: usize = 6000; const MAX_URL_IN_SET: usize = 6000;
const DB_FILE_NAME: &str = "db.toml";
pub struct EventHandler { pub struct EventHandler {
receiver: Receiver<(Event, SitemapConfig)>, receiver: Receiver<(Event, SitemapConfig)>,
@ -23,107 +26,25 @@ pub struct EventHandler {
impl EventHandler { impl EventHandler {
pub fn start(receiver: Receiver<(Event, SitemapConfig)>) -> JoinHandle<()> { pub fn start(receiver: Receiver<(Event, SitemapConfig)>) -> JoinHandle<()> {
let mut s = Self { receiver }; let s = Self { receiver };
tokio::spawn(s.listen()) tokio::spawn(s.listen())
} }
async fn listen(mut self) { async fn listen(mut self) {
while let Some((message, sitemap_config)) = self.receiver.recv().await { while let Some((message, sitemap_config)) = self.receiver.recv().await {
match message { match message {
Event::ProductCreated(product) => {} Event::ProductCreated(product) => {
Event::ProductUpdated(product) => {
if let Some(product) = product.product { if let Some(product) = product.product {
let mut url_sets = read_xmls(&sitemap_config.target_folder).await; product_update_or_create(product, sitemap_config).await;
let mut was_any_set_affected = false;
//in case no sitemaps exist yet, create first urlset
if url_sets.is_empty() {
let url_set = UrlSet::new();
url_sets.push((
url_set,
std::path::Path::new(&format!(
"{}/0.xml",
sitemap_config.target_folder
))
.to_path_buf(),
));
}
// check if any url_sets contain affected urls
for (set, path) in &mut url_sets {
let mut affected_urls = set.find_urls(product.id.inner());
if affected_urls.len() == 0 {
trace!("Product doesn't exist in url_set {:?}", path);
continue;
}
was_any_set_affected = true;
// Update affected urls
affected_urls.iter_mut().for_each(|url| {
let mut templater = TinyTemplate::new();
templater
.add_template("product", &sitemap_config.product_template)
.expect("Check your url templates!");
let new_loc = templater
.render("product", &product)
.expect("Check your url templates!");
debug!("updated `{}` to `{}`", &url.loc, new_loc);
url.loc = new_loc;
});
}
//create product url if no set contained url with it
if !was_any_set_affected {
debug!("Product isn't in any sitemap, creating...");
if let Some((last_url_set, _)) = url_sets.last_mut() {
if product.category.is_none() {
debug!("product missing category, hopefully not needed in url template?");
}
last_url_set.url.push(Url::new_with_ref(
product.id.inner().to_owned(),
product.slug,
RefType::Product,
product.category.clone().map(|c| c.id.inner().to_owned()),
product.category.clone().map(|c| c.slug),
Some(RefType::Category),
));
}
}
let mut split_url_sets = vec![];
//write first time, if some throw too long error, split and try in second
//loop
for url_set in url_sets {
if let Err(e) = write_urlset_to_file(&url_set).await {
match e {
WriteUrlSetToFileErr::UrlSetTooLong(l) => {
debug!("url set too large ({l}), splitting...");
if let Some(mut new_url_sets) =
split_urlset_to_new_file(url_set).await
{
split_url_sets.append(&mut new_url_sets);
}
}
e => error!("{:?}", e),
}
};
}
//the second attempt
for url_set in split_url_sets {
if let Err(e) = write_urlset_to_file(&url_set).await {
match e {
WriteUrlSetToFileErr::UrlSetTooLong(l) => {
error!("url set STILL too large?? ({l}), ignoring url set {:?}...", url_set);
}
e => error!("{:?}", e),
}
};
}
} }
warn!("Event::ProductCreated missing product"); warn!("Event::ProductCreated missing product");
} }
Event::ProductUpdated(product) => {
if let Some(product) = product.product {
product_update_or_create(product, sitemap_config).await;
}
warn!("Event::ProductUpdated missing product");
}
Event::ProductDeleted(product) => {} Event::ProductDeleted(product) => {}
Event::CategoryCreated(category) => {} Event::CategoryCreated(category) => {}
Event::CategoryUpdated(category) => {} Event::CategoryUpdated(category) => {}
@ -140,89 +61,107 @@ impl EventHandler {
} }
} }
async fn read_xmls(target_folder: &str) -> Vec<(UrlSet, PathBuf)> { async fn product_delete(product: Product, sitemap_config: SitemapConfig) {
let paths = read_dir(target_folder).unwrap(); let mut url_set = match get_from_file(&sitemap_config.target_folder).await {
let all_urls: Vec<(UrlSet, PathBuf)> = paths Ok(u) => u,
.into_iter() Err(e) => match e {
.par_bridge() UrlSetFileOperationsErr::IoResult(e) => match e.kind() {
.filter_map(|path| { ErrorKind::NotFound => UrlSet::new(),
if let Ok(path) = path { _ => {
if path.path().is_file() { error!("File errror: {:?}\n won't crash, but probably broken.", e);
let file = File::open(path.path()).expect("Unable to open file"); return;
let reader = BufReader::new(file);
return Some((quick_xml::de::from_reader(reader).unwrap(), path.path()));
} }
},
UrlSetFileOperationsErr::DeError(e) => {
error!(
"DE error: {:?}\n Won't crash, but something went badly wrong",
e
);
return;
} }
return None; },
}) };
.collect();
all_urls
}
/** url_set.flush_related(product.id.inner());
* fails `if url_set.url.len() > MAX_URL_IN_SET`
*/
async fn split_urlset_to_new_file(union: (UrlSet, PathBuf)) -> Option<Vec<(UrlSet, PathBuf)>> {
let (url_set, path) = union;
if url_set.url.len() < MAX_URL_IN_SET { write_to_file(&url_set, &sitemap_config.target_folder)
return None; .await
}
let mut was_original_file_assigned = false;
let chunks = url_set.url.chunks(MAX_URL_IN_SET).collect::<Vec<_>>();
let mut file_number = path
.file_stem()
.unwrap()
.to_str()
.unwrap()
.parse::<i32>()
.unwrap(); .unwrap();
return Some(
chunks
.into_iter()
.map(|urls| {
let folder = path.clone().parent().unwrap().to_str().unwrap().to_owned();
//keep incrementing file number till a file with that number is free to use
if !was_original_file_assigned {
was_original_file_assigned = true
} else {
while !std::path::Path::new(&format!("{folder}/{file_number}.xml")).exists() {
file_number = file_number + 1;
}
}
let mut url_set = UrlSet::new();
url_set.url = urls.into();
(
url_set,
std::path::Path::new(&format!("{folder}/{file_number}.xml")).to_path_buf(),
)
})
.collect::<Vec<_>>(),
);
} }
async fn write_urlset_to_file( async fn product_update_or_create(product: Product, sitemap_config: SitemapConfig) {
url_set_n_path: &(UrlSet, PathBuf), let mut url_set = match get_from_file(&sitemap_config.target_folder).await {
) -> Result<(), WriteUrlSetToFileErr> { Ok(u) => u,
let (url_set, path) = url_set_n_path; Err(e) => match e {
if url_set.url.len() > MAX_URL_IN_SET { UrlSetFileOperationsErr::IoResult(e) => match e.kind() {
return Err(WriteUrlSetToFileErr::UrlSetTooLong(url_set.url.len())); ErrorKind::NotFound => UrlSet::new(),
_ => {
error!("File errror: {:?}\n won't crash, but probably broken.", e);
return;
}
},
UrlSetFileOperationsErr::DeError(e) => {
error!(
"DE error: {:?}\n Won't crash, but something went badly wrong",
e
);
return;
}
},
};
let mut affected_urls = url_set.find_affected(product.id.inner(), &product.slug);
debug!("affected urls: {:?}", &affected_urls);
if affected_urls.len() == 0 {
trace!("Product doesn't exist in url_set yet");
url_set.push(Url::new_product(&sitemap_config.product_template, product).unwrap());
} else {
// Update affected urls
affected_urls.iter_mut().for_each(|url| {
let mut templater = TinyTemplate::new();
templater
.add_template("product", &sitemap_config.product_template)
.expect("Check your url templates!");
let new_loc = templater
.render("product", &product)
.expect("Check your url templates!");
debug!("updated `{}` to `{}`", &url.url, new_loc);
url.url = new_loc;
});
} }
fs::write(path, &quick_xml::se::to_string(&url_set)?)?; write_to_file(&url_set, &sitemap_config.target_folder)
.await
.unwrap();
}
async fn get_from_file(target_folder: &str) -> Result<UrlSet, UrlSetFileOperationsErr> {
let urls: UrlSet =
serde_cbor::de::from_slice(&std::fs::read(format!("{target_folder}/{DB_FILE_NAME}"))?)?;
Ok(urls)
}
async fn write_to_file(
url_set: &UrlSet,
target_folder: &str,
) -> Result<(), UrlSetFileOperationsErr> {
if url_set.len() > MAX_URL_IN_SET {
// return Err(UrlSetFileOperationsErr::UrlSetTooLong(url_set.len()));
warn!("Urlset exeeded {MAX_URL_IN_SET} links, search engines might start to complain!");
}
fs::write(
format!("{target_folder}/{DB_FILE_NAME}"),
&serde_cbor::to_vec(url_set)?,
)?;
Ok(()) Ok(())
} }
#[derive(thiserror::Error, Debug)] #[derive(thiserror::Error, Debug)]
pub enum WriteUrlSetToFileErr { pub enum UrlSetFileOperationsErr {
#[error("writing error")] #[error("writing error")]
IoResult(#[from] std::io::Error), IoResult(#[from] std::io::Error),
#[error("Url set length exeeds xml standard of 10k entries per file")] // #[error("Url set length exeeds xml standard of 10k entries per file")]
UrlSetTooLong(usize), // UrlSetTooLong(usize),
#[error("{0}")] #[error("{0}")]
DeError(#[from] DeError), DeError(#[from] serde_cbor::Error),
} }

View file

@ -11,10 +11,7 @@ use tinytemplate::TinyTemplate;
use crate::{ use crate::{
app::SitemapConfig, app::SitemapConfig,
queries::{ queries::event_subjects_updated::{Category, Collection, Page, Product, ProductUpdated},
event_subjects_updated::{Category, Collection, Page, Product, ProductUpdated},
get_all_categories_n_products::Product,
},
}; };
const SITEMAP_XMLNS: &str = "http://sitemaps.org/schemas/sitemap/0.9"; const SITEMAP_XMLNS: &str = "http://sitemaps.org/schemas/sitemap/0.9";
@ -52,6 +49,27 @@ impl UrlSet {
pub fn new() -> Self { pub fn new() -> Self {
Self { urls: vec![] } Self { urls: vec![] }
} }
pub fn flush_related(&mut self, id: &str) {
self.retain(|u| u.data.id != id && u.related.as_ref().map_or(true, |ud| ud.id != id));
}
pub fn find_related(&mut self, id: &str) -> Vec<&mut Url> {
self.iter_mut()
.filter(|u| u.data.id == id || u.related.as_ref().map_or(false, |ud| ud.id == id))
.collect()
}
pub fn find_affected(&mut self, id: &str, slug: &str) -> Vec<&mut Url> {
self.iter_mut()
.filter(|u| {
u.data.id == id && u.data.slug != slug
|| u.related
.as_ref()
.map_or(false, |ud| ud.id == id && ud.slug != slug)
})
.collect()
}
} }
impl Deref for UrlSet { impl Deref for UrlSet {
@ -68,10 +86,7 @@ impl DerefMut for UrlSet {
} }
impl Url { impl Url {
pub fn new_product( pub fn new_product(template: &str, product: Product) -> Result<Self, NewUrlError> {
sitemap_config: &SitemapConfig,
product: Product,
) -> Result<Self, NewUrlError> {
let category = product let category = product
.category .category
.as_ref() .as_ref()
@ -91,16 +106,13 @@ impl Url {
let mut tt = TinyTemplate::new(); let mut tt = TinyTemplate::new();
tt.add_template("t", &sitemap_config.product_template); tt.add_template("t", template)?;
let url = tt.render("t", &product)?; let url = tt.render("t", &product)?;
Ok(Self { url, data, related }) Ok(Self { url, data, related })
} }
pub fn new_category( pub fn new_category(template: &str, category: Category) -> Result<Self, NewUrlError> {
sitemap_config: &SitemapConfig,
category: Category,
) -> Result<Self, NewUrlError> {
let data = ItemData { let data = ItemData {
id: category.id.inner().to_owned(), id: category.id.inner().to_owned(),
slug: category.slug.clone(), slug: category.slug.clone(),
@ -108,7 +120,7 @@ impl Url {
}; };
let mut tt = TinyTemplate::new(); let mut tt = TinyTemplate::new();
tt.add_template("t", &sitemap_config.category_template); tt.add_template("t", template)?;
let url = tt.render("t", &category)?; let url = tt.render("t", &category)?;
Ok(Self { Ok(Self {
@ -118,10 +130,7 @@ impl Url {
}) })
} }
pub fn new_collection( pub fn new_collection(template: &str, collection: Collection) -> Result<Self, NewUrlError> {
sitemap_config: &SitemapConfig,
collection: Collection,
) -> Result<Self, NewUrlError> {
let data = ItemData { let data = ItemData {
id: collection.id.inner().to_owned(), id: collection.id.inner().to_owned(),
slug: collection.slug.clone(), slug: collection.slug.clone(),
@ -129,7 +138,7 @@ impl Url {
}; };
let mut tt = TinyTemplate::new(); let mut tt = TinyTemplate::new();
tt.add_template("t", &sitemap_config.collection_template); tt.add_template("t", template);
let url = tt.render("t", &collection)?; let url = tt.render("t", &collection)?;
Ok(Self { Ok(Self {
@ -139,7 +148,7 @@ impl Url {
}) })
} }
pub fn new_page(sitemap_config: &SitemapConfig, page: Page) -> Result<Self, NewUrlError> { pub fn new_page(template: &str, page: Page) -> Result<Self, NewUrlError> {
let data = ItemData { let data = ItemData {
id: page.id.inner().to_owned(), id: page.id.inner().to_owned(),
slug: page.slug.clone(), slug: page.slug.clone(),
@ -147,7 +156,7 @@ impl Url {
}; };
let mut tt = TinyTemplate::new(); let mut tt = TinyTemplate::new();
tt.add_template("t", &sitemap_config.pages_template); tt.add_template("t", template);
let url = tt.render("t", &page)?; let url = tt.render("t", &page)?;
Ok(Self { Ok(Self {

View file

@ -2,7 +2,7 @@ use crate::{
app::{trace_to_std, SitemapConfig}, app::{trace_to_std, SitemapConfig},
create_app, create_app,
queries::event_subjects_updated::{Category, Product, ProductUpdated}, queries::event_subjects_updated::{Category, Product, ProductUpdated},
sitemap::{RefType, Url, UrlSet}, sitemap::{Url, UrlSet},
}; };
use axum::{ use axum::{
body::Body, body::Body,
@ -79,12 +79,17 @@ async fn index_returns_ok() {
#[rstest] #[rstest]
async fn updates_xml_from_product() { async fn updates_xml_from_product() {
let mut app = init_test_app().await; let mut app = init_test_app().await;
// let app = app.ready().await.unwrap();
let product_id = cynic::Id::new("product1".to_owned()); let product_updated = ProductUpdated {
let product_slug = "product1slug".to_owned(); product: Some(Product {
let category_id = cynic::Id::new("category1".to_owned()); id: cynic::Id::new("product1".to_owned()),
let category_slug = "category1slug".to_owned(); slug: "product1slug".to_owned(),
category: Some(Category {
slug: "category1slug".to_owned(),
id: cynic::Id::new("category1".to_owned()),
}),
}),
};
let response = app let response = app
.ready() .ready()
@ -94,17 +99,7 @@ async fn updates_xml_from_product() {
Request::builder() Request::builder()
.uri("/api/webhooks") .uri("/api/webhooks")
.body(Body::from( .body(Body::from(
serde_json::to_string_pretty(&ProductUpdated { serde_json::to_string_pretty(&product_updated).unwrap(),
product: Some(Product {
id: product_id.clone(),
slug: product_slug.clone(),
category: Some(Category {
slug: category_slug.clone(),
id: category_id.clone(),
}),
}),
})
.unwrap(),
)) ))
.unwrap(), .unwrap(),
) )
@ -117,14 +112,11 @@ async fn updates_xml_from_product() {
serde_json::from_str(&std::fs::read_to_string("./temp/sitemaps/1.xml").unwrap()).unwrap(); serde_json::from_str(&std::fs::read_to_string("./temp/sitemaps/1.xml").unwrap()).unwrap();
let mut webhook_url_set = UrlSet::new(); let mut webhook_url_set = UrlSet::new();
webhook_url_set.url = vec![Url::new_with_ref( webhook_url_set.urls = vec![Url::new_product(
product_id.inner().to_owned(), "https://example.com/{product.category.slug}/{product.slug}",
product_slug.clone(), product_updated.product.unwrap(),
RefType::Product, )
Some(category_id.inner().to_owned()), .unwrap()];
Some(category_slug.clone()),
Some(RefType::Category),
)];
assert_eq!(xml, webhook_url_set); assert_eq!(xml, webhook_url_set);
} }
@ -132,38 +124,50 @@ async fn updates_xml_from_product() {
#[rstest] #[rstest]
fn urlset_serialisation_isnt_lossy() { fn urlset_serialisation_isnt_lossy() {
std::env::set_var("APP_API_BASE_URL", "http://localhost:3000"); std::env::set_var("APP_API_BASE_URL", "http://localhost:3000");
let sitemap_config = SitemapConfig {
target_folder: "./temp/sitemaps".to_string(),
pages_template: "https://example.com/{page.slug}".to_string(),
index_hostname: "https://example.com".to_string(),
product_template: "https://example.com/{product.category.slug}/{product.slug}".to_string(),
category_template: "https://example.com/{category.slug}".to_string(),
collection_template: "https://example.com/collection/{collection.slug}".to_string(),
};
init_tracing(); init_tracing();
let product1 = Product {
id: cynic::Id::new("product1".to_owned()),
slug: "product1slug".to_owned(),
category: Some(Category {
slug: "category1slug".to_owned(),
id: cynic::Id::new("category1".to_owned()),
}),
};
let product2 = Product {
id: cynic::Id::new("product2".to_owned()),
slug: "product2slug".to_owned(),
category: Some(Category {
slug: "category2slug".to_owned(),
id: cynic::Id::new("category2".to_owned()),
}),
};
let mut url_set = UrlSet::new(); let mut url_set = UrlSet::new();
url_set.url.append(&mut vec![ url_set.urls = vec![
Url::new( Url::new_category(
"category1coolid".to_string(), &sitemap_config.category_template,
"category1".to_string(), product1.category.clone().unwrap(),
RefType::Category, )
), .unwrap(),
Url::new( Url::new_product(&sitemap_config.product_template, product1).unwrap(),
"Collection1coolid".to_string(), Url::new_category(
"Collection1".to_string(), &sitemap_config.category_template,
RefType::Collection, product2.category.clone().unwrap(),
), )
Url::new_with_ref( .unwrap(),
"category1coolid".to_string(), Url::new_product(&sitemap_config.product_template, product2).unwrap(),
"category1".to_string(), ];
RefType::Product, let file_str = serde_cbor::to_vec(&url_set).unwrap();
Some("product1coolid".to_string()), let deserialized_url_set: UrlSet = serde_cbor::de::from_slice(&file_str).unwrap();
Some("product1".to_string()),
Some(RefType::Category),
),
Url::new_with_ref(
"category2coolid".to_string(),
"category2".to_string(),
RefType::Product,
Some("product2coolid".to_string()),
Some("product2".to_string()),
Some(RefType::Category),
),
]);
let file_str = quick_xml::se::to_string(&url_set).unwrap();
dbg!(&file_str);
let deserialized_url_set: UrlSet = quick_xml::de::from_str(&file_str).unwrap();
assert_eq!(url_set, deserialized_url_set); assert_eq!(url_set, deserialized_url_set);
} }