Compare commits
2 commits
87144b25eb
...
3d28f93f51
Author | SHA1 | Date | |
---|---|---|---|
3d28f93f51 | |||
b43431cb03 |
5 changed files with 84 additions and 40 deletions
|
@ -21,7 +21,7 @@ pub async fn cloak(ctx: upub::Context, post_contents: bool) -> Result<(), Reques
|
||||||
|
|
||||||
if post_contents {
|
if post_contents {
|
||||||
let mut stream = upub::model::object::Entity::find()
|
let mut stream = upub::model::object::Entity::find()
|
||||||
.filter(upub::model::object::Column::Content.is_not_null())
|
.filter(upub::model::object::Column::Content.like("<img"))
|
||||||
.select_only()
|
.select_only()
|
||||||
.select_column(upub::model::object::Column::Internal)
|
.select_column(upub::model::object::Column::Internal)
|
||||||
.select_column(upub::model::object::Column::Content)
|
.select_column(upub::model::object::Column::Content)
|
||||||
|
@ -30,7 +30,7 @@ pub async fn cloak(ctx: upub::Context, post_contents: bool) -> Result<(), Reques
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
while let Some((internal, content)) = stream.try_next().await? {
|
while let Some((internal, content)) = stream.try_next().await? {
|
||||||
let sanitized = mdhtml::safe_html(&content);
|
let sanitized = ctx.sanitize(&content);
|
||||||
if sanitized != content {
|
if sanitized != content {
|
||||||
let model = upub::model::object::ActiveModel {
|
let model = upub::model::object::ActiveModel {
|
||||||
internal: Unchanged(internal),
|
internal: Unchanged(internal),
|
||||||
|
|
|
@ -28,6 +28,7 @@ serde-inline-default = "0.2"
|
||||||
toml = "0.8"
|
toml = "0.8"
|
||||||
uriproxy = { path = "../../utils/uriproxy" }
|
uriproxy = { path = "../../utils/uriproxy" }
|
||||||
httpsign = { path = "../../utils/httpsign/" }
|
httpsign = { path = "../../utils/httpsign/" }
|
||||||
|
mdhtml = { path = "../../utils/mdhtml/" }
|
||||||
jrd = "0.1"
|
jrd = "0.1"
|
||||||
tracing = "0.1"
|
tracing = "0.1"
|
||||||
sea-orm = { version = "0.12", features = ["macros"] }
|
sea-orm = { version = "0.12", features = ["macros"] }
|
||||||
|
|
|
@ -28,10 +28,34 @@ pub trait Cloaker {
|
||||||
|
|
||||||
Some(url)
|
Some(url)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn cloaked(&self, url: &str) -> String;
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Cloaker for crate::Context {
|
impl Cloaker for crate::Context {
|
||||||
fn secret(&self) -> &str {
|
fn secret(&self) -> &str {
|
||||||
&self.cfg().security.proxy_secret
|
&self.cfg().security.proxy_secret
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn cloaked(&self, url: &str) -> String {
|
||||||
|
let (sig, url) = self.cloak(url);
|
||||||
|
crate::url!(self, "/proxy/{sig}/{url}")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO this shouldnt sit in bare context.rs but also having it here is weird!!
|
||||||
|
impl crate::Context {
|
||||||
|
pub fn sanitize(&self, text: &str) -> String {
|
||||||
|
let _ctx = self.clone();
|
||||||
|
mdhtml::Sanitizer::new(
|
||||||
|
Box::new(move |txt| {
|
||||||
|
if _ctx.is_local(txt) {
|
||||||
|
txt.to_string()
|
||||||
|
} else {
|
||||||
|
_ctx.cloaked(txt)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
)
|
||||||
|
.html(text)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
use apb::{field::OptionalString, Collection, Document, Endpoints, Node, Object, PublicKey};
|
use apb::{field::OptionalString, Collection, Document, Endpoints, Node, Object, PublicKey};
|
||||||
use sea_orm::{sea_query::Expr, ActiveModelTrait, ActiveValue::{Unchanged, NotSet, Set}, ColumnTrait, ConnectionTrait, DbErr, EntityTrait, IntoActiveModel, QueryFilter};
|
use sea_orm::{sea_query::Expr, ActiveModelTrait, ActiveValue::{Unchanged, NotSet, Set}, ColumnTrait, ConnectionTrait, DbErr, EntityTrait, IntoActiveModel, QueryFilter};
|
||||||
|
|
||||||
use super::Fetcher;
|
use super::{Cloaker, Fetcher};
|
||||||
|
|
||||||
#[derive(Debug, thiserror::Error)]
|
#[derive(Debug, thiserror::Error)]
|
||||||
pub enum NormalizerError {
|
pub enum NormalizerError {
|
||||||
|
@ -27,10 +27,9 @@ impl Normalizer for crate::Context {
|
||||||
async fn insert_object(&self, object: impl apb::Object, tx: &impl ConnectionTrait) -> Result<crate::model::object::Model, NormalizerError> {
|
async fn insert_object(&self, object: impl apb::Object, tx: &impl ConnectionTrait) -> Result<crate::model::object::Model, NormalizerError> {
|
||||||
let mut object_model = AP::object(&object)?;
|
let mut object_model = AP::object(&object)?;
|
||||||
|
|
||||||
// TOO should we make sure content only contains a safe subset of html ? frontend does it too
|
if let Some(content) = object_model.content {
|
||||||
// if let Some(content) = object_model.content {
|
object_model.content = Some(self.sanitize(&content));
|
||||||
// object_model.content = Some(mdhtml::safe_html(&content));
|
}
|
||||||
// }
|
|
||||||
|
|
||||||
// fix context for remote posts
|
// fix context for remote posts
|
||||||
// > if any link is broken or we get rate limited, the whole insertion fails which is
|
// > if any link is broken or we get rate limited, the whole insertion fails which is
|
||||||
|
@ -79,14 +78,19 @@ impl Normalizer for crate::Context {
|
||||||
},
|
},
|
||||||
Node::Link(l) => crate::model::attachment::ActiveModel {
|
Node::Link(l) => crate::model::attachment::ActiveModel {
|
||||||
internal: sea_orm::ActiveValue::NotSet,
|
internal: sea_orm::ActiveValue::NotSet,
|
||||||
url: Set(l.href().unwrap_or_default().to_string()),
|
url: Set(self.cloaked(l.href().unwrap_or_default())),
|
||||||
object: Set(object_model.internal),
|
object: Set(object_model.internal),
|
||||||
document_type: Set(apb::DocumentType::Page),
|
document_type: Set(apb::DocumentType::Page),
|
||||||
name: Set(l.name().str()),
|
name: Set(l.name().str()),
|
||||||
media_type: Set(l.media_type().unwrap_or("link").to_string()),
|
media_type: Set(l.media_type().unwrap_or("link").to_string()),
|
||||||
},
|
},
|
||||||
Node::Object(o) =>
|
Node::Object(o) => {
|
||||||
AP::attachment_q(o.as_document()?, object_model.internal, None)?,
|
let mut model = AP::attachment_q(o.as_document()?, object_model.internal, None)?;
|
||||||
|
if let Set(u) | Unchanged(u) = model.url {
|
||||||
|
model.url = Set(self.cloaked(&u));
|
||||||
|
}
|
||||||
|
model
|
||||||
|
},
|
||||||
};
|
};
|
||||||
crate::model::attachment::Entity::insert(attachment_model)
|
crate::model::attachment::Entity::insert(attachment_model)
|
||||||
.exec(tx)
|
.exec(tx)
|
||||||
|
|
|
@ -1,15 +1,51 @@
|
||||||
use html5ever::tendril::*;
|
use html5ever::{tendril::SliceExt, tokenizer::{BufferQueue, TagKind, Token, TokenSink, TokenSinkResult, Tokenizer}};
|
||||||
use html5ever::tokenizer::{BufferQueue, TagKind, Token, TokenSink, TokenSinkResult, Tokenizer};
|
|
||||||
use comrak::{markdown_to_html, Options};
|
use comrak::{markdown_to_html, Options};
|
||||||
|
|
||||||
/// In our case, our sink only contains a tokens vector
|
pub type Cloaker = Box<dyn Fn(&str) -> String>;
|
||||||
#[derive(Debug, Clone, Default)]
|
|
||||||
struct Sink {
|
#[derive(Default)]
|
||||||
pub media_proxy: Option<String>,
|
pub struct Sanitizer {
|
||||||
|
pub cloaker: Option<Cloaker>,
|
||||||
pub buffer: String,
|
pub buffer: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TokenSink for Sink {
|
pub fn safe_html(text: &str) -> String {
|
||||||
|
Sanitizer::default().html(text)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn safe_markdown(text: &str) -> String {
|
||||||
|
Sanitizer::default().markdown(text)
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Sanitizer {
|
||||||
|
pub fn new(cloak: Cloaker) -> Self {
|
||||||
|
Self {
|
||||||
|
buffer: String::default(),
|
||||||
|
cloaker: Some(cloak),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn markdown(self, text: &str) -> String {
|
||||||
|
self.html(&markdown_to_html(text, &Options::default()))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn html(self, text: &str) -> String {
|
||||||
|
let mut input = BufferQueue::default();
|
||||||
|
input.push_back(text.to_tendril().try_reinterpret().unwrap());
|
||||||
|
|
||||||
|
let mut tok = Tokenizer::new(self, Default::default());
|
||||||
|
let _ = tok.feed(&mut input);
|
||||||
|
|
||||||
|
if !input.is_empty() {
|
||||||
|
tracing::warn!("buffer input not empty after processing html");
|
||||||
|
}
|
||||||
|
tok.end();
|
||||||
|
|
||||||
|
tok.sink.buffer
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TokenSink for Sanitizer {
|
||||||
type Handle = ();
|
type Handle = ();
|
||||||
|
|
||||||
/// Each processed token will be handled by this method
|
/// Each processed token will be handled by this method
|
||||||
|
@ -38,8 +74,8 @@ impl TokenSink for Sink {
|
||||||
"img" => for attr in tag.attrs {
|
"img" => for attr in tag.attrs {
|
||||||
match attr.name.local.as_ref() {
|
match attr.name.local.as_ref() {
|
||||||
"src" => {
|
"src" => {
|
||||||
let src = if let Some(ref proxy) = self.media_proxy {
|
let src = if let Some(ref cloak) = self.cloaker {
|
||||||
format!("{proxy}{}", attr.value.as_ref())
|
cloak(attr.value.as_ref())
|
||||||
} else {
|
} else {
|
||||||
attr.value.to_string()
|
attr.value.to_string()
|
||||||
};
|
};
|
||||||
|
@ -86,24 +122,3 @@ impl TokenSink for Sink {
|
||||||
TokenSinkResult::Continue
|
TokenSinkResult::Continue
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn safe_markdown(text: &str) -> String {
|
|
||||||
safe_html(&markdown_to_html(text, &Options::default()))
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn safe_html(text: &str) -> String {
|
|
||||||
let mut input = BufferQueue::default();
|
|
||||||
input.push_back(text.to_tendril().try_reinterpret().unwrap());
|
|
||||||
|
|
||||||
let sink = Sink::default();
|
|
||||||
|
|
||||||
let mut tok = Tokenizer::new(sink, Default::default());
|
|
||||||
let _ = tok.feed(&mut input);
|
|
||||||
|
|
||||||
if !input.is_empty() {
|
|
||||||
tracing::warn!("buffer input not empty after processing html");
|
|
||||||
}
|
|
||||||
tok.end();
|
|
||||||
|
|
||||||
tok.sink.buffer
|
|
||||||
}
|
|
||||||
|
|
Loading…
Reference in a new issue