From 5800f39c67a696af03dfbd85b23fb9dca91ab940 Mon Sep 17 00:00:00 2001 From: alemi Date: Sun, 29 Dec 2024 02:44:40 +0100 Subject: [PATCH] fix: skip double lemmy image, not any attachment some lemmy posts which had a single link attachment would get that attachment stripped for this fix. now it should keep those while still stripping the doubled image --- upub/core/src/traits/normalize.rs | 110 ++++++++++++++++-------------- 1 file changed, 58 insertions(+), 52 deletions(-) diff --git a/upub/core/src/traits/normalize.rs b/upub/core/src/traits/normalize.rs index 5637615..c706ff7 100644 --- a/upub/core/src/traits/normalize.rs +++ b/upub/core/src/traits/normalize.rs @@ -72,62 +72,68 @@ impl Normalizer for crate::Context { .await?; } - // TODO this check is a bit disgusting but lemmy for some incomprehensible reason sends us - // the same image twice: once in `image` and once as `attachment`. you may say "well just - // check if url is the same" and i absolutely do but lemmy is 10 steps forwards and it sends - // the same image twice with two distinct links. checkmate fedi developers!!!!! - // so basically i don't want to clutter my timeline with double images, nor fetch every image - // that comes from lemmy (we cloak and lazy-load) just to dedupe it... let attachments = object.attachment().flat(); - if !( - self.cfg().compat.skip_single_attachment_if_image_is_set - && object_model.image.is_some() - && attachments.len() == 1 - ) { - let obj_image = object_model.image.clone().unwrap_or_default(); - for attachment in attachments { - let attachment_model = match attachment { - Node::Empty => continue, - Node::Array(_) => { - tracing::warn!("ignoring array-in-array while processing attachments"); - continue - }, - Node::Object(o) => { - let mut model = AP::attachment_q(o.as_document()?, object_model.internal, None)?; - if let Set(u) | Unchanged(u) = model.url { - if u == obj_image { continue }; - model.url = Set(self.cloaked(&u)); - } - model - }, - Node::Link(l) => { - let url = l.href().unwrap_or_default(); - if url == obj_image { continue }; - let mut media_type = l.media_type().unwrap_or("link".to_string()); - let mut document_type = apb::DocumentType::Page; - if self.cfg().compat.fix_attachment_images_media_type - && [".jpg", ".jpeg", ".png", ".webp", ".bmp"] // TODO more image types??? - .iter() - .any(|x| url.ends_with(x)) - { + let obj_image = object_model.image.clone().unwrap_or_default(); + let attachments_len = attachments.len(); + for attachment in attachments { + let attachment_model = match attachment { + Node::Empty => continue, + Node::Array(_) => { + tracing::warn!("ignoring array-in-array while processing attachments"); + continue + }, + Node::Object(o) => { + let mut model = AP::attachment_q(o.as_document()?, object_model.internal, None)?; + if let Set(u) | Unchanged(u) = model.url { + if u == obj_image { continue }; + model.url = Set(self.cloaked(&u)); + } + model + }, + Node::Link(l) => { + let url = l.href().unwrap_or_default(); + if url == obj_image { continue }; + let mut media_type = l.media_type().unwrap_or("link".to_string()); + let mut document_type = apb::DocumentType::Page; + let mut is_image = false; + if [".jpg", ".jpeg", ".png", ".webp", ".bmp"] // TODO more image types??? + .iter() + .any(|x| url.ends_with(x)) + { + is_image = true; + if self.cfg().compat.fix_attachment_images_media_type { document_type = apb::DocumentType::Image; media_type = format!("image/{}", url.split('.').last().unwrap_or_default()); - } - crate::model::attachment::ActiveModel { - internal: sea_orm::ActiveValue::NotSet, - url: Set(self.cloaked(&url)), - object: Set(object_model.internal), - document_type: Set(document_type), - name: Set(l.name().ok()), - media_type: Set(media_type), - } - }, - }; - crate::model::attachment::Entity::insert(attachment_model) - .exec(tx) - .await?; - } + } + + // TODO this check is a bit disgusting but lemmy for some incomprehensible reason sends us + // the same image twice: once in `image` and once as `attachment`. you may say "well just + // check if url is the same" and i absolutely do but lemmy is 10 steps forwards and it sends + // the same image twice with two distinct links. checkmate fedi developers!!!!! + // so basically i don't want to clutter my timeline with double images, nor fetch every image + // that comes from lemmy (we cloak and lazy-load) just to dedupe it... + if is_image + && self.cfg().compat.skip_single_attachment_if_image_is_set + && object_model.image.is_some() + && attachments_len == 1 + { + continue; + } + + crate::model::attachment::ActiveModel { + internal: sea_orm::ActiveValue::NotSet, + url: Set(self.cloaked(&url)), + object: Set(object_model.internal), + document_type: Set(document_type), + name: Set(l.name().ok()), + media_type: Set(media_type), + } + }, + }; + crate::model::attachment::Entity::insert(attachment_model) + .exec(tx) + .await?; } for tag in object.tag().flat() {