diff --git a/upub/cli/src/cloak.rs b/upub/cli/src/cloak.rs
index 7670e1e..9a607d5 100644
--- a/upub/cli/src/cloak.rs
+++ b/upub/cli/src/cloak.rs
@@ -21,7 +21,7 @@ pub async fn cloak(ctx: upub::Context, post_contents: bool) -> Result<(), Reques
if post_contents {
let mut stream = upub::model::object::Entity::find()
- .filter(upub::model::object::Column::Content.is_not_null())
+ .filter(upub::model::object::Column::Content.like(" Result<(), Reques
.await?;
while let Some((internal, content)) = stream.try_next().await? {
- let sanitized = mdhtml::safe_html(&content);
+ let sanitized = ctx.sanitize(&content);
if sanitized != content {
let model = upub::model::object::ActiveModel {
internal: Unchanged(internal),
diff --git a/upub/core/Cargo.toml b/upub/core/Cargo.toml
index ebc7434..97beca9 100644
--- a/upub/core/Cargo.toml
+++ b/upub/core/Cargo.toml
@@ -28,6 +28,7 @@ serde-inline-default = "0.2"
toml = "0.8"
uriproxy = { path = "../../utils/uriproxy" }
httpsign = { path = "../../utils/httpsign/" }
+mdhtml = { path = "../../utils/mdhtml/" }
jrd = "0.1"
tracing = "0.1"
sea-orm = { version = "0.12", features = ["macros"] }
diff --git a/upub/core/src/traits/cloak.rs b/upub/core/src/traits/cloak.rs
index 1856c52..f29004d 100644
--- a/upub/core/src/traits/cloak.rs
+++ b/upub/core/src/traits/cloak.rs
@@ -28,10 +28,34 @@ pub trait Cloaker {
Some(url)
}
+
+ fn cloaked(&self, url: &str) -> String;
}
impl Cloaker for crate::Context {
fn secret(&self) -> &str {
&self.cfg().security.proxy_secret
}
+
+ fn cloaked(&self, url: &str) -> String {
+ let (sig, url) = self.cloak(url);
+ crate::url!(self, "/proxy/{sig}/{url}")
+ }
+}
+
+// TODO this shouldnt sit in bare context.rs but also having it here is weird!!
+impl crate::Context {
+ pub fn sanitize(&self, text: &str) -> String {
+ let _ctx = self.clone();
+ mdhtml::Sanitizer::new(
+ Box::new(move |txt| {
+ if _ctx.is_local(txt) {
+ txt.to_string()
+ } else {
+ _ctx.cloaked(txt)
+ }
+ })
+ )
+ .html(text)
+ }
}
diff --git a/upub/core/src/traits/normalize.rs b/upub/core/src/traits/normalize.rs
index 2c1135e..07278e8 100644
--- a/upub/core/src/traits/normalize.rs
+++ b/upub/core/src/traits/normalize.rs
@@ -1,7 +1,7 @@
use apb::{field::OptionalString, Collection, Document, Endpoints, Node, Object, PublicKey};
use sea_orm::{sea_query::Expr, ActiveModelTrait, ActiveValue::{Unchanged, NotSet, Set}, ColumnTrait, ConnectionTrait, DbErr, EntityTrait, IntoActiveModel, QueryFilter};
-use super::Fetcher;
+use super::{Cloaker, Fetcher};
#[derive(Debug, thiserror::Error)]
pub enum NormalizerError {
@@ -27,10 +27,9 @@ impl Normalizer for crate::Context {
async fn insert_object(&self, object: impl apb::Object, tx: &impl ConnectionTrait) -> Result {
let mut object_model = AP::object(&object)?;
- // TOO should we make sure content only contains a safe subset of html ? frontend does it too
- // if let Some(content) = object_model.content {
- // object_model.content = Some(mdhtml::safe_html(&content));
- // }
+ if let Some(content) = object_model.content {
+ object_model.content = Some(self.sanitize(&content));
+ }
// fix context for remote posts
// > if any link is broken or we get rate limited, the whole insertion fails which is
@@ -79,14 +78,19 @@ impl Normalizer for crate::Context {
},
Node::Link(l) => crate::model::attachment::ActiveModel {
internal: sea_orm::ActiveValue::NotSet,
- url: Set(l.href().unwrap_or_default().to_string()),
+ url: Set(self.cloaked(l.href().unwrap_or_default())),
object: Set(object_model.internal),
document_type: Set(apb::DocumentType::Page),
name: Set(l.name().str()),
media_type: Set(l.media_type().unwrap_or("link").to_string()),
},
- Node::Object(o) =>
- AP::attachment_q(o.as_document()?, object_model.internal, None)?,
+ Node::Object(o) => {
+ let mut model = AP::attachment_q(o.as_document()?, object_model.internal, None)?;
+ if let Set(u) | Unchanged(u) = model.url {
+ model.url = Set(self.cloaked(&u));
+ }
+ model
+ },
};
crate::model::attachment::Entity::insert(attachment_model)
.exec(tx)