Compare commits
2 commits
87144b25eb
...
3d28f93f51
Author | SHA1 | Date | |
---|---|---|---|
3d28f93f51 | |||
b43431cb03 |
5 changed files with 84 additions and 40 deletions
|
@ -21,7 +21,7 @@ pub async fn cloak(ctx: upub::Context, post_contents: bool) -> Result<(), Reques
|
|||
|
||||
if post_contents {
|
||||
let mut stream = upub::model::object::Entity::find()
|
||||
.filter(upub::model::object::Column::Content.is_not_null())
|
||||
.filter(upub::model::object::Column::Content.like("<img"))
|
||||
.select_only()
|
||||
.select_column(upub::model::object::Column::Internal)
|
||||
.select_column(upub::model::object::Column::Content)
|
||||
|
@ -30,7 +30,7 @@ pub async fn cloak(ctx: upub::Context, post_contents: bool) -> Result<(), Reques
|
|||
.await?;
|
||||
|
||||
while let Some((internal, content)) = stream.try_next().await? {
|
||||
let sanitized = mdhtml::safe_html(&content);
|
||||
let sanitized = ctx.sanitize(&content);
|
||||
if sanitized != content {
|
||||
let model = upub::model::object::ActiveModel {
|
||||
internal: Unchanged(internal),
|
||||
|
|
|
@ -28,6 +28,7 @@ serde-inline-default = "0.2"
|
|||
toml = "0.8"
|
||||
uriproxy = { path = "../../utils/uriproxy" }
|
||||
httpsign = { path = "../../utils/httpsign/" }
|
||||
mdhtml = { path = "../../utils/mdhtml/" }
|
||||
jrd = "0.1"
|
||||
tracing = "0.1"
|
||||
sea-orm = { version = "0.12", features = ["macros"] }
|
||||
|
|
|
@ -28,10 +28,34 @@ pub trait Cloaker {
|
|||
|
||||
Some(url)
|
||||
}
|
||||
|
||||
fn cloaked(&self, url: &str) -> String;
|
||||
}
|
||||
|
||||
impl Cloaker for crate::Context {
|
||||
fn secret(&self) -> &str {
|
||||
&self.cfg().security.proxy_secret
|
||||
}
|
||||
|
||||
fn cloaked(&self, url: &str) -> String {
|
||||
let (sig, url) = self.cloak(url);
|
||||
crate::url!(self, "/proxy/{sig}/{url}")
|
||||
}
|
||||
}
|
||||
|
||||
// TODO this shouldnt sit in bare context.rs but also having it here is weird!!
|
||||
impl crate::Context {
|
||||
pub fn sanitize(&self, text: &str) -> String {
|
||||
let _ctx = self.clone();
|
||||
mdhtml::Sanitizer::new(
|
||||
Box::new(move |txt| {
|
||||
if _ctx.is_local(txt) {
|
||||
txt.to_string()
|
||||
} else {
|
||||
_ctx.cloaked(txt)
|
||||
}
|
||||
})
|
||||
)
|
||||
.html(text)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
use apb::{field::OptionalString, Collection, Document, Endpoints, Node, Object, PublicKey};
|
||||
use sea_orm::{sea_query::Expr, ActiveModelTrait, ActiveValue::{Unchanged, NotSet, Set}, ColumnTrait, ConnectionTrait, DbErr, EntityTrait, IntoActiveModel, QueryFilter};
|
||||
|
||||
use super::Fetcher;
|
||||
use super::{Cloaker, Fetcher};
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum NormalizerError {
|
||||
|
@ -27,10 +27,9 @@ impl Normalizer for crate::Context {
|
|||
async fn insert_object(&self, object: impl apb::Object, tx: &impl ConnectionTrait) -> Result<crate::model::object::Model, NormalizerError> {
|
||||
let mut object_model = AP::object(&object)?;
|
||||
|
||||
// TOO should we make sure content only contains a safe subset of html ? frontend does it too
|
||||
// if let Some(content) = object_model.content {
|
||||
// object_model.content = Some(mdhtml::safe_html(&content));
|
||||
// }
|
||||
if let Some(content) = object_model.content {
|
||||
object_model.content = Some(self.sanitize(&content));
|
||||
}
|
||||
|
||||
// fix context for remote posts
|
||||
// > if any link is broken or we get rate limited, the whole insertion fails which is
|
||||
|
@ -79,14 +78,19 @@ impl Normalizer for crate::Context {
|
|||
},
|
||||
Node::Link(l) => crate::model::attachment::ActiveModel {
|
||||
internal: sea_orm::ActiveValue::NotSet,
|
||||
url: Set(l.href().unwrap_or_default().to_string()),
|
||||
url: Set(self.cloaked(l.href().unwrap_or_default())),
|
||||
object: Set(object_model.internal),
|
||||
document_type: Set(apb::DocumentType::Page),
|
||||
name: Set(l.name().str()),
|
||||
media_type: Set(l.media_type().unwrap_or("link").to_string()),
|
||||
},
|
||||
Node::Object(o) =>
|
||||
AP::attachment_q(o.as_document()?, object_model.internal, None)?,
|
||||
Node::Object(o) => {
|
||||
let mut model = AP::attachment_q(o.as_document()?, object_model.internal, None)?;
|
||||
if let Set(u) | Unchanged(u) = model.url {
|
||||
model.url = Set(self.cloaked(&u));
|
||||
}
|
||||
model
|
||||
},
|
||||
};
|
||||
crate::model::attachment::Entity::insert(attachment_model)
|
||||
.exec(tx)
|
||||
|
|
|
@ -1,15 +1,51 @@
|
|||
use html5ever::tendril::*;
|
||||
use html5ever::tokenizer::{BufferQueue, TagKind, Token, TokenSink, TokenSinkResult, Tokenizer};
|
||||
use html5ever::{tendril::SliceExt, tokenizer::{BufferQueue, TagKind, Token, TokenSink, TokenSinkResult, Tokenizer}};
|
||||
use comrak::{markdown_to_html, Options};
|
||||
|
||||
/// In our case, our sink only contains a tokens vector
|
||||
#[derive(Debug, Clone, Default)]
|
||||
struct Sink {
|
||||
pub media_proxy: Option<String>,
|
||||
pub type Cloaker = Box<dyn Fn(&str) -> String>;
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct Sanitizer {
|
||||
pub cloaker: Option<Cloaker>,
|
||||
pub buffer: String,
|
||||
}
|
||||
|
||||
impl TokenSink for Sink {
|
||||
pub fn safe_html(text: &str) -> String {
|
||||
Sanitizer::default().html(text)
|
||||
}
|
||||
|
||||
pub fn safe_markdown(text: &str) -> String {
|
||||
Sanitizer::default().markdown(text)
|
||||
}
|
||||
|
||||
impl Sanitizer {
|
||||
pub fn new(cloak: Cloaker) -> Self {
|
||||
Self {
|
||||
buffer: String::default(),
|
||||
cloaker: Some(cloak),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn markdown(self, text: &str) -> String {
|
||||
self.html(&markdown_to_html(text, &Options::default()))
|
||||
}
|
||||
|
||||
pub fn html(self, text: &str) -> String {
|
||||
let mut input = BufferQueue::default();
|
||||
input.push_back(text.to_tendril().try_reinterpret().unwrap());
|
||||
|
||||
let mut tok = Tokenizer::new(self, Default::default());
|
||||
let _ = tok.feed(&mut input);
|
||||
|
||||
if !input.is_empty() {
|
||||
tracing::warn!("buffer input not empty after processing html");
|
||||
}
|
||||
tok.end();
|
||||
|
||||
tok.sink.buffer
|
||||
}
|
||||
}
|
||||
|
||||
impl TokenSink for Sanitizer {
|
||||
type Handle = ();
|
||||
|
||||
/// Each processed token will be handled by this method
|
||||
|
@ -38,8 +74,8 @@ impl TokenSink for Sink {
|
|||
"img" => for attr in tag.attrs {
|
||||
match attr.name.local.as_ref() {
|
||||
"src" => {
|
||||
let src = if let Some(ref proxy) = self.media_proxy {
|
||||
format!("{proxy}{}", attr.value.as_ref())
|
||||
let src = if let Some(ref cloak) = self.cloaker {
|
||||
cloak(attr.value.as_ref())
|
||||
} else {
|
||||
attr.value.to_string()
|
||||
};
|
||||
|
@ -86,24 +122,3 @@ impl TokenSink for Sink {
|
|||
TokenSinkResult::Continue
|
||||
}
|
||||
}
|
||||
|
||||
pub fn safe_markdown(text: &str) -> String {
|
||||
safe_html(&markdown_to_html(text, &Options::default()))
|
||||
}
|
||||
|
||||
pub fn safe_html(text: &str) -> String {
|
||||
let mut input = BufferQueue::default();
|
||||
input.push_back(text.to_tendril().try_reinterpret().unwrap());
|
||||
|
||||
let sink = Sink::default();
|
||||
|
||||
let mut tok = Tokenizer::new(sink, Default::default());
|
||||
let _ = tok.feed(&mut input);
|
||||
|
||||
if !input.is_empty() {
|
||||
tracing::warn!("buffer input not empty after processing html");
|
||||
}
|
||||
tok.end();
|
||||
|
||||
tok.sink.buffer
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue