diff --git a/upub/cli/src/cloak.rs b/upub/cli/src/cloak.rs index 7670e1e..9a607d5 100644 --- a/upub/cli/src/cloak.rs +++ b/upub/cli/src/cloak.rs @@ -21,7 +21,7 @@ pub async fn cloak(ctx: upub::Context, post_contents: bool) -> Result<(), Reques if post_contents { let mut stream = upub::model::object::Entity::find() - .filter(upub::model::object::Column::Content.is_not_null()) + .filter(upub::model::object::Column::Content.like(" Result<(), Reques .await?; while let Some((internal, content)) = stream.try_next().await? { - let sanitized = mdhtml::safe_html(&content); + let sanitized = ctx.sanitize(&content); if sanitized != content { let model = upub::model::object::ActiveModel { internal: Unchanged(internal), diff --git a/upub/core/Cargo.toml b/upub/core/Cargo.toml index ebc7434..97beca9 100644 --- a/upub/core/Cargo.toml +++ b/upub/core/Cargo.toml @@ -28,6 +28,7 @@ serde-inline-default = "0.2" toml = "0.8" uriproxy = { path = "../../utils/uriproxy" } httpsign = { path = "../../utils/httpsign/" } +mdhtml = { path = "../../utils/mdhtml/" } jrd = "0.1" tracing = "0.1" sea-orm = { version = "0.12", features = ["macros"] } diff --git a/upub/core/src/traits/cloak.rs b/upub/core/src/traits/cloak.rs index 1856c52..f29004d 100644 --- a/upub/core/src/traits/cloak.rs +++ b/upub/core/src/traits/cloak.rs @@ -28,10 +28,34 @@ pub trait Cloaker { Some(url) } + + fn cloaked(&self, url: &str) -> String; } impl Cloaker for crate::Context { fn secret(&self) -> &str { &self.cfg().security.proxy_secret } + + fn cloaked(&self, url: &str) -> String { + let (sig, url) = self.cloak(url); + crate::url!(self, "/proxy/{sig}/{url}") + } +} + +// TODO this shouldnt sit in bare context.rs but also having it here is weird!! +impl crate::Context { + pub fn sanitize(&self, text: &str) -> String { + let _ctx = self.clone(); + mdhtml::Sanitizer::new( + Box::new(move |txt| { + if _ctx.is_local(txt) { + txt.to_string() + } else { + _ctx.cloaked(txt) + } + }) + ) + .html(text) + } } diff --git a/upub/core/src/traits/normalize.rs b/upub/core/src/traits/normalize.rs index 2c1135e..07278e8 100644 --- a/upub/core/src/traits/normalize.rs +++ b/upub/core/src/traits/normalize.rs @@ -1,7 +1,7 @@ use apb::{field::OptionalString, Collection, Document, Endpoints, Node, Object, PublicKey}; use sea_orm::{sea_query::Expr, ActiveModelTrait, ActiveValue::{Unchanged, NotSet, Set}, ColumnTrait, ConnectionTrait, DbErr, EntityTrait, IntoActiveModel, QueryFilter}; -use super::Fetcher; +use super::{Cloaker, Fetcher}; #[derive(Debug, thiserror::Error)] pub enum NormalizerError { @@ -27,10 +27,9 @@ impl Normalizer for crate::Context { async fn insert_object(&self, object: impl apb::Object, tx: &impl ConnectionTrait) -> Result { let mut object_model = AP::object(&object)?; - // TOO should we make sure content only contains a safe subset of html ? frontend does it too - // if let Some(content) = object_model.content { - // object_model.content = Some(mdhtml::safe_html(&content)); - // } + if let Some(content) = object_model.content { + object_model.content = Some(self.sanitize(&content)); + } // fix context for remote posts // > if any link is broken or we get rate limited, the whole insertion fails which is @@ -79,14 +78,19 @@ impl Normalizer for crate::Context { }, Node::Link(l) => crate::model::attachment::ActiveModel { internal: sea_orm::ActiveValue::NotSet, - url: Set(l.href().unwrap_or_default().to_string()), + url: Set(self.cloaked(l.href().unwrap_or_default())), object: Set(object_model.internal), document_type: Set(apb::DocumentType::Page), name: Set(l.name().str()), media_type: Set(l.media_type().unwrap_or("link").to_string()), }, - Node::Object(o) => - AP::attachment_q(o.as_document()?, object_model.internal, None)?, + Node::Object(o) => { + let mut model = AP::attachment_q(o.as_document()?, object_model.internal, None)?; + if let Set(u) | Unchanged(u) = model.url { + model.url = Set(self.cloaked(&u)); + } + model + }, }; crate::model::attachment::Entity::insert(attachment_model) .exec(tx)