From 6f0026a818c178ae243f854cf6dd86ed93836f6e Mon Sep 17 00:00:00 2001
From: alemi <me@alemi.dev>
Date: Sun, 2 Feb 2025 01:25:12 +0100
Subject: [PATCH] fix: infer document type from mimetype

---
 core/src/traits/normalize.rs | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/core/src/traits/normalize.rs b/core/src/traits/normalize.rs
index 9cebcf5..520d1de 100644
--- a/core/src/traits/normalize.rs
+++ b/core/src/traits/normalize.rs
@@ -272,11 +272,27 @@ impl AP {
 		if !matches!(t, apb::BaseType::Object(apb::ObjectType::Document(_))) {
 			return Err(NormalizerError::WrongType(apb::BaseType::Object(apb::ObjectType::Document(apb::DocumentType::Document)), t));
 		}
+
+		// if they gave us just url+mimetype, try detecting document type from mimetype
+		let mut document_type = document.document_type().ok();
+		if document_type.is_none() {
+			if let Ok(media_type) = document.media_type() {
+				if let Some((t, _mime)) = media_type.split_once('/') {
+					document_type = match t {
+						"audio" => Some(apb::DocumentType::Audio),
+						"image" => Some(apb::DocumentType::Image),
+						"video" => Some(apb::DocumentType::Video),
+						_ => None,
+					}
+				}
+			}
+		}
+
 		Ok(crate::model::attachment::Model {
 			internal: 0,
 			url: document.url().id().unwrap_or_default(),
 			object: parent,
-			document_type: document.as_document().map_or(apb::DocumentType::Document, |x| x.document_type().unwrap_or(apb::DocumentType::Page)),
+			document_type: document_type.unwrap_or(apb::DocumentType::Page),
 			name: document.name().ok(),
 			media_type: document.media_type().unwrap_or("link".to_string()),
 		})