ditch xml, let's just use sitemap.txt

This commit is contained in:
djkato 2024-07-10 23:51:59 +02:00
parent e471bf8adb
commit 303c122877
11 changed files with 448 additions and 193 deletions

15
.neoconf.json Normal file
View file

@ -0,0 +1,15 @@
{
"lspconfig": {
"rust_analyzer": {
"rust-analyzer.cargo.features": "all"
/*
use this only when working with leptos
*/
// "rust-analyzer.rustfmt.overrideCommand": [
// "leptosfmt",
// "--stdin",
// "--rustfmt"
// ]
}
}
}

View file

@ -1,12 +0,0 @@
{
"lspconfig": {
"rust_analyzer": {
"rust-analyzer.cargo.features": "all",
"rust-analyzer.rustfmt.overrideCommand": [
"leptosfmt",
"--stdin",
"--rustfmt"
]
}
}
}

80
Cargo.lock generated
View file

@ -154,6 +154,16 @@ version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eab1c04a571841102f5345a8fc0f6bb3d31c315dec879b5c6e42e40ce7ffa34e"
[[package]]
name = "async-attributes"
version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a3203e79f4dd9bdda415ed03cf14dae5a2bf775c683a00f94e9cd1faf0f596e5"
dependencies = [
"quote",
"syn 1.0.109",
]
[[package]]
name = "async-channel"
version = "1.9.0"
@ -283,6 +293,7 @@ version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62565bb4402e926b29953c785397c6dc0391b7b446e45008b0049eb43cec6f5d"
dependencies = [
"async-attributes",
"async-channel 1.9.0",
"async-global-executor",
"async-io 1.13.0",
@ -1471,6 +1482,12 @@ version = "0.3.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004"
[[package]]
name = "futures-timer"
version = "3.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24"
[[package]]
name = "futures-util"
version = "0.3.30"
@ -1548,6 +1565,12 @@ version = "0.28.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253"
[[package]]
name = "glob"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
[[package]]
name = "globset"
version = "0.4.14"
@ -3264,6 +3287,12 @@ version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f"
[[package]]
name = "relative-path"
version = "1.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2"
[[package]]
name = "rend"
version = "0.4.2"
@ -3359,6 +3388,36 @@ dependencies = [
"syn 1.0.109",
]
[[package]]
name = "rstest"
version = "0.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9afd55a67069d6e434a95161415f5beeada95a01c7b815508a82dcb0e1593682"
dependencies = [
"futures",
"futures-timer",
"rstest_macros",
"rustc_version 0.4.0",
]
[[package]]
name = "rstest_macros"
version = "0.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4165dfae59a39dd41d8dec720d3cbfbc71f69744efb480a3920f5d4e0cc6798d"
dependencies = [
"cfg-if",
"glob",
"proc-macro-crate",
"proc-macro2",
"quote",
"regex",
"relative-path",
"rustc_version 0.4.0",
"syn 2.0.48",
"unicode-ident",
]
[[package]]
name = "rstml"
version = "0.11.2"
@ -3407,7 +3466,16 @@ version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a"
dependencies = [
"semver",
"semver 0.9.0",
]
[[package]]
name = "rustc_version"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366"
dependencies = [
"semver 1.0.23",
]
[[package]]
@ -3615,6 +3683,12 @@ dependencies = [
"semver-parser",
]
[[package]]
name = "semver"
version = "1.0.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b"
[[package]]
name = "semver-parser"
version = "0.7.0"
@ -3921,6 +3995,7 @@ name = "sitemap-generator"
version = "1.0.0"
dependencies = [
"anyhow",
"async-std",
"axum",
"chrono",
"cynic",
@ -3932,6 +4007,7 @@ dependencies = [
"pico-args",
"quick-xml",
"rayon",
"rstest",
"saleor-app-sdk",
"serde",
"serde_cbor",
@ -4041,7 +4117,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d022496b16281348b52d0e30ae99e01a73d737b2f45d38fed4edf79f9325a1d5"
dependencies = [
"discard",
"rustc_version",
"rustc_version 0.2.3",
"stdweb-derive",
"stdweb-internal-macros",
"stdweb-internal-runtime",

View file

@ -42,6 +42,8 @@ leptos_axum = { version = "0.6" }
leptos_meta = { version = "0.6", features = ["nightly"] }
leptos_router = { version = "0.6", features = ["nightly"] }
web-sys = "0.3.69"
rstest = "0.21.0"
async-std = { version = "1.5", features = ["attributes"] }
# Defines a size-optimized profile for the WASM bundle in release mode
[profile.wasm-release]

View file

@ -22,13 +22,16 @@ tracing-serde.workspace = true
tracing-subscriber.workspace = true
dotenvy.workspace = true
axum.workspace = true
saleor-app-sdk.workspace = true
saleor-app-sdk = { workspace = true, features = ["file_apl"] }
tower = { workspace = true, features = ["util"] }
tower-http = { workspace = true, features = ["fs", "trace"] }
surf.workspace = true
cynic = { workspace = true, features = ["http-surf"] }
cynic-codegen.workspace = true
thiserror.workspace = true
rstest.workspace = true
async-std = { workspace = true, features = ["attributes"] }
tera = { version = "1.19.1", default-features = false }
fd-lock = "4.0.2"
quick-xml = { version = "0.34.0", features = ["serialize"] }

View file

@ -10,8 +10,11 @@ mod app;
mod queries;
mod routes;
mod sitemap;
mod test;
#[cfg(debug_assertions)]
mod tests;
use axum::Router;
use saleor_app_sdk::{
config::Config,
manifest::{cargo_info, AppManifestBuilder, AppPermission},
@ -35,13 +38,31 @@ use crate::{
};
#[tokio::main]
async fn main() -> anyhow::Result<()> {
let config = Config::load()?;
trace_to_std(&config)?;
let sitemap_config = SitemapConfig::load()?;
async fn main() {
debug!("Creating configs...");
let config = Config::load().unwrap();
trace_to_std(&config).unwrap();
let sitemap_config = SitemapConfig::load().unwrap();
let saleor_app = SaleorApp::new(&config)?;
let app = create_app(&config, sitemap_config).await;
let listener = tokio::net::TcpListener::bind(
"0.0.0.0:".to_owned()
+ config
.app_api_base_url
.split(':')
.collect::<Vec<_>>()
.get(2)
.unwrap_or(&"3000"),
)
.await
.unwrap();
info!("listening on {}", listener.local_addr().unwrap());
axum::serve(listener, app).await.unwrap();
}
async fn create_app(config: &Config, sitemap_config: SitemapConfig) -> Router {
let saleor_app = SaleorApp::new(&config).unwrap();
debug!("Creating saleor App...");
let app_manifest = AppManifestBuilder::new(&config, cargo_info!())
@ -83,27 +104,11 @@ async fn main() -> anyhow::Result<()> {
Ok(v) => v,
Err(e) => {
error!("Missing channel slug, Saleor will soon deprecate product queries without channel specified.");
anyhow::bail!(e);
"".to_string()
}
},
saleor_app: Arc::new(Mutex::new(saleor_app)),
};
debug!("Created AppState...");
let app = create_routes(app_state);
let listener = tokio::net::TcpListener::bind(
"0.0.0.0:".to_owned()
+ config
.app_api_base_url
.split(':')
.collect::<Vec<_>>()
.get(2)
.unwrap_or(&"3000"),
)
.await?;
info!("listening on {}", listener.local_addr()?);
match axum::serve(listener, app).await {
Ok(o) => Ok(o),
Err(e) => anyhow::bail!(e),
}
create_routes(app_state)
}

View file

@ -1,11 +1,15 @@
use axum::{
handler::HandlerWithoutStateExt,
http::StatusCode,
middleware,
routing::{any, get, post},
Router,
};
#[cfg(not(debug_assertions))]
use axum::middleware;
#[cfg(not(debug_assertions))]
use saleor_app_sdk::middleware::verify_webhook_signature::webhook_signature_verifier;
use tower_http::services::ServeDir;
use crate::app::AppState;
@ -22,14 +26,20 @@ pub fn create_routes(state: AppState) -> Router {
(StatusCode::NOT_FOUND, "Not found")
}
let service = handle_404.into_service();
//TODO : Fix this relative path issue in workspaces
#[cfg(not(debug_assertions))]
let serve_dir = ServeDir::new("./public").not_found_service(service);
#[cfg(debug_assertions)]
let serve_dir = ServeDir::new("./sitemap-generator/public").not_found_service(service);
//TODO: Query for everything using the app auth token
//TODO: "Failed fetching initial products: More than one channel exists, please spocify which one"
Router::new()
.route("/api/webhooks", any(webhooks))
.layer(middleware::from_fn(webhook_signature_verifier))
let r = Router::new().route("/api/webhooks", any(webhooks));
#[cfg(not(debug_assertions))]
r.layer(middleware::from_fn(webhook_signature_verifier));
r
//handles just path, eg. localhost:3000/
.route(
"/",

View file

@ -4,9 +4,18 @@ mod event_handler;
mod page;
mod product;
use chrono::{DateTime, FixedOffset, SubsecRound};
use quick_xml::DeError;
use std::ops::{Deref, DerefMut};
use serde::{Deserialize, Serialize};
use tinytemplate::TinyTemplate;
use crate::{
app::SitemapConfig,
queries::{
event_subjects_updated::{Category, Collection, Page, Product, ProductUpdated},
get_all_categories_n_products::Product,
},
};
const SITEMAP_XMLNS: &str = "http://sitemaps.org/schemas/sitemap/0.9";
const SALEOR_REF_XMLNS: &str = "http://app-sitemap-generator.kremik.sk/xml-schemas/saleor-ref.xsd";
@ -14,136 +23,145 @@ const SALEOR_REF_XMLNS: &str = "http://app-sitemap-generator.kremik.sk/xml-schem
#[derive(Serialize, Deserialize, PartialEq, Eq, Debug, Clone)]
#[serde(rename = "urlset")]
pub struct UrlSet {
#[serde(rename = "@xmlns:saleor")]
xmlns_saleor: String,
#[serde(rename = "@xmlns")]
xmlns: String,
pub url: Vec<Url>,
pub urls: Vec<Url>,
}
#[derive(Serialize, Deserialize, PartialEq, Eq, Debug, Clone)]
pub struct Url {
pub loc: String,
pub lastmod: DateTime<FixedOffset>,
#[serde(rename = "saleor:ref")]
pub saleor_ref: SaleorRef,
pub url: String,
pub data: ItemData,
pub related: Option<ItemData>,
}
#[derive(Serialize, Deserialize, PartialEq, Eq, Debug, Clone)]
pub enum RefType {
pub struct ItemData {
pub id: String,
pub slug: String,
pub typ: ItemType,
}
#[derive(Serialize, Deserialize, PartialEq, Eq, Debug, Clone)]
pub enum ItemType {
Product,
Category,
Collection,
Page,
}
#[derive(Serialize, Deserialize, PartialEq, Eq, Debug, Clone)]
pub struct SaleorRef {
#[serde(rename = "saleor:id")]
pub id: String,
#[serde(rename = "saleor:type")]
pub typ: RefType,
/**
Related items come first in url, if present. eg:
site.com/{page} : typ = RefType::Page
site.com/{category}/{product} : typ= Product, related_typ: Category
*/
#[serde(rename = "saleor:related-id")]
#[serde(skip_serializing_if = "Option::is_none")]
pub related_id: Option<String>,
/**
Related items come first in url, if present. eg:
site.com/{page} : typ = RefType::Page
site.com/{category}/{product} : typ= Product, related_typ: Category
*/
#[serde(rename = "saleor:related-typ")]
#[serde(skip_serializing_if = "Option::is_none")]
pub related_typ: Option<RefType>,
impl UrlSet {
pub fn new() -> Self {
Self { urls: vec![] }
}
}
impl UrlSet {
/**
Icludes xml version header
*/
pub fn to_file(&self) -> Result<String, DeError> {
let init = quick_xml::se::to_string(self)?;
Ok(r#"<?xml version="1.0" encoding="UTF-8"?>"#.to_string() + "\n" + &init)
}
/**
adds static xmlns default strings
*/
pub fn new() -> Self {
let mut base_url = std::env::var("APP_API_BASE_URL").unwrap();
//Cuz apparently xml url thingy isn't actually an url so you can't https? Gosh I hate xml
if base_url.contains("https") {
base_url = base_url.replacen("https", "http", 1);
}
//Trailing / in url would mess stuff up
if base_url.chars().last().unwrap() == '/' {
base_url.pop();
}
let xmlns_saleor = format!("{base_url}/schemas/saleor-ref.xsd",);
Self {
xmlns: SITEMAP_XMLNS.to_string(),
xmlns_saleor,
url: vec![],
}
impl Deref for UrlSet {
type Target = Vec<Url>;
fn deref(&self) -> &Self::Target {
&self.urls
}
}
pub fn find_urls(&mut self, id: &str) -> Vec<&mut Url> {
self.url
.iter_mut()
.filter(|url| {
url.saleor_ref.id == id || url.saleor_ref.related_id == Some(id.to_owned())
})
.collect()
impl DerefMut for UrlSet {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.urls
}
}
impl Url {
pub fn new(id: String, slug: String, typ: RefType) -> Self {
Self {
saleor_ref: SaleorRef {
id,
typ,
related_id: None,
related_typ: None,
},
lastmod: chrono::offset::Utc::now().fixed_offset().round_subsecs(1),
// Have template string determine the url
loc: format!("https://example.com/{slug}"),
}
pub fn new_product(
sitemap_config: &SitemapConfig,
product: Product,
) -> Result<Self, NewUrlError> {
let category = product
.category
.as_ref()
.ok_or(NewUrlError::MissingData)?
.clone();
let data = ItemData {
id: product.id.inner().to_owned(),
slug: product.slug.clone(),
typ: ItemType::Product,
};
let related = Some(ItemData {
id: category.id.inner().to_owned(),
slug: category.slug,
typ: ItemType::Category,
});
let mut tt = TinyTemplate::new();
tt.add_template("t", &sitemap_config.product_template);
let url = tt.render("t", &product)?;
Ok(Self { url, data, related })
}
/**
For exaple: product/category, product/collection
*/
pub fn new_with_ref(
id: String,
slug: String,
typ: RefType,
related_id: Option<String>,
related_slug: Option<String>,
related_typ: Option<RefType>,
) -> Self {
let loc = match related_slug {
Some(r_s) => {
format!("https://example.com/{r_s}/{slug}")
}
None => {
format!("https://example.com/{slug}")
}
pub fn new_category(
sitemap_config: &SitemapConfig,
category: Category,
) -> Result<Self, NewUrlError> {
let data = ItemData {
id: category.id.inner().to_owned(),
slug: category.slug.clone(),
typ: ItemType::Category,
};
Self {
saleor_ref: SaleorRef {
id,
typ,
related_id,
related_typ,
},
lastmod: chrono::offset::Utc::now().fixed_offset().round_subsecs(1),
// Have template string determine the url
loc,
}
let mut tt = TinyTemplate::new();
tt.add_template("t", &sitemap_config.category_template);
let url = tt.render("t", &category)?;
Ok(Self {
url,
data,
related: None,
})
}
pub fn new_collection(
sitemap_config: &SitemapConfig,
collection: Collection,
) -> Result<Self, NewUrlError> {
let data = ItemData {
id: collection.id.inner().to_owned(),
slug: collection.slug.clone(),
typ: ItemType::Collection,
};
let mut tt = TinyTemplate::new();
tt.add_template("t", &sitemap_config.collection_template);
let url = tt.render("t", &collection)?;
Ok(Self {
url,
data,
related: None,
})
}
pub fn new_page(sitemap_config: &SitemapConfig, page: Page) -> Result<Self, NewUrlError> {
let data = ItemData {
id: page.id.inner().to_owned(),
slug: page.slug.clone(),
typ: ItemType::Page,
};
let mut tt = TinyTemplate::new();
tt.add_template("t", &sitemap_config.pages_template);
let url = tt.render("t", &page)?;
Ok(Self {
url,
data,
related: None,
})
}
}
#[derive(thiserror::Error, Debug)]
pub enum NewUrlError {
#[error("Some property inside passed data for new url was None, but should've been Some")]
MissingData,
#[error("Bad templates or wrong context data to fill out the template")]
BadTemplating(#[from] tinytemplate::error::Error),
}

View file

@ -1,40 +0,0 @@
#[cfg(test)]
mod test {
use crate::sitemap::{RefType, Url, UrlSet};
fn urlset_serialisation_isnt_lossy() {
let mut url_set = UrlSet::new();
url_set.url.append(&mut vec![
Url::new(
"category1coolid".to_string(),
"category1".to_string(),
RefType::Category,
),
Url::new(
"Collection1".to_string(),
"Collection1coolid".to_string(),
RefType::Collection,
),
Url::new_with_ref(
"category1coolid".to_string(),
"category1".to_string(),
RefType::Product,
Some("product1coolid".to_string()),
Some("product1".to_string()),
Some(RefType::Category),
),
Url::new_with_ref(
"category2coolid".to_string(),
"category2".to_string(),
RefType::Product,
Some("product2coolid".to_string()),
Some("product2".to_string()),
Some(RefType::Category),
),
]);
let file_str = url_set.to_file().unwrap();
let deserialized_url_set: UrlSet = quick_xml::de::from_str(&file_str).unwrap();
assert_eq!(url_set, deserialized_url_set);
}
}

View file

@ -0,0 +1,169 @@
use crate::{
app::{trace_to_std, SitemapConfig},
create_app,
queries::event_subjects_updated::{Category, Product, ProductUpdated},
sitemap::{RefType, Url, UrlSet},
};
use axum::{
body::Body,
extract::path::ErrorKind,
http::{Request, StatusCode},
routing::RouterIntoService,
Json, Router,
};
use rstest::*;
use saleor_app_sdk::{apl::AplType, config::Config};
use tower::{MakeService, Service, ServiceExt};
use tracing::Level;
fn init_tracing() {
let config = Config {
apl: AplType::File,
apl_url: "redis://localhost:6379".to_string(),
log_level: Level::TRACE,
app_api_base_url: "http://localhost:3000".to_string(),
app_iframe_base_url: "http://localhost:3000".to_string(),
required_saleor_version: "^3.13".to_string(),
};
trace_to_std(&config).unwrap();
}
async fn init_test_app() -> RouterIntoService<Body> {
match std::fs::remove_dir_all("./temp/sitemaps") {
Err(e) => match e.kind() {
std::io::ErrorKind::NotFound => (),
_ => panic!("{:?}", e),
},
_ => (),
};
std::fs::create_dir_all("./temp/sitemaps").unwrap();
std::env::set_var("APP_API_BASE_URL", "http://localhost:3000");
let config = Config {
apl: AplType::File,
apl_url: "redis://localhost:6379".to_string(),
log_level: Level::TRACE,
app_api_base_url: "http://localhost:3000".to_string(),
app_iframe_base_url: "http://localhost:3000".to_string(),
required_saleor_version: "^3.13".to_string(),
};
let sitemap_config = SitemapConfig {
target_folder: "./temp/sitemaps".to_string(),
pages_template: "https://example.com/{page.slug}".to_string(),
index_hostname: "https://example.com".to_string(),
product_template: "https://example.com/{product.category.slug}/{product.slug}".to_string(),
category_template: "https://example.com/{category.slug}".to_string(),
collection_template: "https://example.com/collection/{collection.slug}".to_string(),
};
create_app(&config, sitemap_config)
.await
.into_service::<Body>()
}
#[rstest]
async fn index_returns_ok() {
let mut app = init_test_app().await;
let response = app
.ready()
.await
.unwrap()
.call(Request::builder().uri("/").body(Body::empty()).unwrap())
.await
.unwrap();
assert_eq!(response.status(), StatusCode::OK);
}
#[rstest]
async fn updates_xml_from_product() {
let mut app = init_test_app().await;
// let app = app.ready().await.unwrap();
let product_id = cynic::Id::new("product1".to_owned());
let product_slug = "product1slug".to_owned();
let category_id = cynic::Id::new("category1".to_owned());
let category_slug = "category1slug".to_owned();
let response = app
.ready()
.await
.unwrap()
.call(
Request::builder()
.uri("/api/webhooks")
.body(Body::from(
serde_json::to_string_pretty(&ProductUpdated {
product: Some(Product {
id: product_id.clone(),
slug: product_slug.clone(),
category: Some(Category {
slug: category_slug.clone(),
id: category_id.clone(),
}),
}),
})
.unwrap(),
))
.unwrap(),
)
.await
.unwrap();
assert_eq!(response.status(), StatusCode::OK);
let xml: UrlSet =
serde_json::from_str(&std::fs::read_to_string("./temp/sitemaps/1.xml").unwrap()).unwrap();
let mut webhook_url_set = UrlSet::new();
webhook_url_set.url = vec![Url::new_with_ref(
product_id.inner().to_owned(),
product_slug.clone(),
RefType::Product,
Some(category_id.inner().to_owned()),
Some(category_slug.clone()),
Some(RefType::Category),
)];
assert_eq!(xml, webhook_url_set);
}
#[rstest]
fn urlset_serialisation_isnt_lossy() {
std::env::set_var("APP_API_BASE_URL", "http://localhost:3000");
init_tracing();
let mut url_set = UrlSet::new();
url_set.url.append(&mut vec![
Url::new(
"category1coolid".to_string(),
"category1".to_string(),
RefType::Category,
),
Url::new(
"Collection1coolid".to_string(),
"Collection1".to_string(),
RefType::Collection,
),
Url::new_with_ref(
"category1coolid".to_string(),
"category1".to_string(),
RefType::Product,
Some("product1coolid".to_string()),
Some("product1".to_string()),
Some(RefType::Category),
),
Url::new_with_ref(
"category2coolid".to_string(),
"category2".to_string(),
RefType::Product,
Some("product2coolid".to_string()),
Some("product2".to_string()),
Some(RefType::Category),
),
]);
let file_str = quick_xml::se::to_string(&url_set).unwrap();
dbg!(&file_str);
let deserialized_url_set: UrlSet = quick_xml::de::from_str(&file_str).unwrap();
assert_eq!(url_set, deserialized_url_set);
}

9
tests-compose.yml Normal file
View file

@ -0,0 +1,9 @@
services:
redis:
image: bitnami/redis:latest
environment:
- ALLOW_EMPTY_PASSWORD=yes
- DISABLE_COMMANDS=FLUSHDB,FLUSHALL,CONFIG
ports:
- 6379:6379
restart: unless-stopped