From f3f176406e5a804ab38c0b8c9b16c8158760e0b0 Mon Sep 17 00:00:00 2001 From: alemi Date: Wed, 26 Jun 2024 04:37:33 +0200 Subject: [PATCH] feat!: store mentions as internal ids this completely breaks all current mentions (unless you want to cook some script to join and fix them) but should mean halving the mentions table size, or close to that. also theres foreign keys so it cascades automatically, at the cost that now we can't store mention links for users we don't know yet --- upub/core/src/model/mention.rs | 2 +- upub/core/src/selector/batch.rs | 71 ++++++++++++++----- upub/core/src/selector/rich.rs | 70 ++++++++++-------- upub/core/src/traits/normalize.rs | 18 ++--- ...000005_create_attachments_tags_mentions.rs | 21 +++--- 5 files changed, 116 insertions(+), 66 deletions(-) diff --git a/upub/core/src/model/mention.rs b/upub/core/src/model/mention.rs index b2a3a9c..a982fb3 100644 --- a/upub/core/src/model/mention.rs +++ b/upub/core/src/model/mention.rs @@ -6,7 +6,7 @@ pub struct Model { #[sea_orm(primary_key)] pub internal: i64, pub object: i64, - pub actor: String, + pub actor: i64, } #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] diff --git a/upub/core/src/selector/batch.rs b/upub/core/src/selector/batch.rs index 2c52270..00aeab3 100644 --- a/upub/core/src/selector/batch.rs +++ b/upub/core/src/selector/batch.rs @@ -39,7 +39,7 @@ impl BatchFillable for Vec { for element in self.iter_mut() { if let Some(ref object) = element.object { if let Some(v) = map.remove(&object.internal) { - element.accept(v); + element.accept(v, tx).await?; } } } @@ -71,7 +71,7 @@ impl BatchFillable for Vec { } for element in self.iter_mut() { if let Some(v) = map.remove(&element.object.internal) { - element.accept(v); + element.accept(v, tx).await?; } } Ok(self) @@ -92,7 +92,7 @@ impl BatchFillable for RichActivity { .filter(E::comparison(vec![obj.internal])) .all(tx) .await?; - self.accept(batch); + self.accept(batch, tx).await?; } Ok(self) } @@ -111,7 +111,7 @@ impl BatchFillable for RichObject { .filter(E::comparison(vec![self.object.internal])) .all(tx) .await?; - self.accept(batch); + self.accept(batch, tx).await?; Ok(self) } } @@ -119,7 +119,9 @@ impl BatchFillable for RichObject { // welcome to interlocking trait hell, enjoy your stay mod hell { - use sea_orm::{sea_query::IntoCondition, ColumnTrait}; + use sea_orm::{sea_query::IntoCondition, ColumnTrait, ConnectionTrait, DbErr, EntityTrait}; + +use crate::selector::rich::{RichHashtag, RichMention}; pub trait BatchFillableComparison { fn comparison(ids: Vec) -> sea_orm::Condition; @@ -165,43 +167,80 @@ mod hell { } } + #[async_trait::async_trait] pub trait BatchFillableAcceptor { - fn accept(&mut self, batch: B); + async fn accept(&mut self, batch: B, tx: &impl ConnectionTrait) -> Result<(), DbErr>; } + #[async_trait::async_trait] impl BatchFillableAcceptor> for super::RichActivity { - fn accept(&mut self, batch: Vec) { + async fn accept(&mut self, batch: Vec, _tx: &impl ConnectionTrait) -> Result<(), DbErr> { self.attachments = Some(batch); + Ok(()) } } + #[async_trait::async_trait] impl BatchFillableAcceptor> for super::RichActivity { - fn accept(&mut self, batch: Vec) { - self.hashtags = Some(batch); + async fn accept(&mut self, batch: Vec, _tx: &impl ConnectionTrait) -> Result<(), DbErr> { + self.hashtags = Some(batch.into_iter().map(|x| RichHashtag { hash: x }).collect()); + Ok(()) } } + #[async_trait::async_trait] impl BatchFillableAcceptor> for super::RichActivity { - fn accept(&mut self, batch: Vec) { - self.mentions = Some(batch); + async fn accept(&mut self, batch: Vec, tx: &impl ConnectionTrait) -> Result<(), DbErr> { + // TODO batch load users from mentions rather than doing for loop + let mut mentions = Vec::new(); + for row in batch { + // TODO filter only needed rows + if let Some(user) = crate::model::actor::Entity::find_by_id(row.actor).one(tx).await? { + mentions.push(RichMention { + mention: row, + fqn: format!("@{}@{}", user.preferred_username, user.domain), + id: user.id, + }); + } + } + self.mentions = Some(mentions); + Ok(()) } } + #[async_trait::async_trait] impl BatchFillableAcceptor> for super::RichObject { - fn accept(&mut self, batch: Vec) { + async fn accept(&mut self, batch: Vec, _tx: &impl ConnectionTrait) -> Result<(), DbErr> { self.attachments = Some(batch); + Ok(()) } } + #[async_trait::async_trait] impl BatchFillableAcceptor> for super::RichObject { - fn accept(&mut self, batch: Vec) { - self.hashtags = Some(batch); + async fn accept(&mut self, batch: Vec, _tx: &impl ConnectionTrait) -> Result<(), DbErr> { + self.hashtags = Some(batch.into_iter().map(|x| RichHashtag { hash: x }).collect()); + Ok(()) } } + #[async_trait::async_trait] impl BatchFillableAcceptor> for super::RichObject { - fn accept(&mut self, batch: Vec) { - self.mentions = Some(batch); + async fn accept(&mut self, batch: Vec, tx: &impl ConnectionTrait) -> Result<(), DbErr> { + // TODO batch load users from mentions rather than doing for loop + let mut mentions = Vec::new(); + for row in batch { + // TODO filter only needed rows + if let Some(user) = crate::model::actor::Entity::find_by_id(row.actor).one(tx).await? { + mentions.push(RichMention { + mention: row, + fqn: format!("@{}@{}", user.preferred_username, user.domain), + id: user.id, + }); + } + } + self.mentions = Some(mentions); + Ok(()) } } } diff --git a/upub/core/src/selector/rich.rs b/upub/core/src/selector/rich.rs index 77f7161..5c83d15 100644 --- a/upub/core/src/selector/rich.rs +++ b/upub/core/src/selector/rich.rs @@ -1,14 +1,42 @@ -use apb::{ActivityMut, LinkMut, ObjectMut}; +use apb::ActivityMut; use sea_orm::{DbErr, EntityName, FromQueryResult, Iden, QueryResult}; +pub struct RichMention { + pub mention: crate::model::mention::Model, + pub id: String, + pub fqn: String, +} + +impl RichMention { + pub fn ap(self) -> serde_json::Value { + use apb::LinkMut; + apb::new() + .set_link_type(Some(apb::LinkType::Mention)) + .set_href(&self.id) + .set_name(Some(&self.fqn)) + } +} + +pub struct RichHashtag { + pub hash: crate::model::hashtag::Model, +} + +impl RichHashtag { + pub fn ap(self) -> serde_json::Value { + use apb::LinkMut; + apb::new() + .set_name(Some(&format!("#{}", self.hash.name))) + .set_link_type(Some(apb::LinkType::Hashtag)) + } +} pub struct RichActivity { pub activity: crate::model::activity::Model, pub object: Option, pub liked: Option, pub attachments: Option>, - pub hashtags: Option>, - pub mentions: Option>, + pub hashtags: Option>, + pub mentions: Option>, } impl FromQueryResult for RichActivity { @@ -24,6 +52,7 @@ impl FromQueryResult for RichActivity { impl RichActivity { pub fn ap(self) -> serde_json::Value { + use apb::ObjectMut; let object = match self.object { None => apb::Node::maybe_link(self.activity.object.clone()), Some(o) => { @@ -31,23 +60,12 @@ impl RichActivity { let mut tags = Vec::new(); if let Some(mentions) = self.mentions { for mention in mentions { - tags.push( - apb::new() - .set_link_type(Some(apb::LinkType::Mention)) - .set_href(&mention.actor) - // TODO do i need to set name? i could join while batch loading or put the @name in - // each mention object... - ); + tags.push(mention.ap()); } } if let Some(hashtags) = self.hashtags { for hash in hashtags { - tags.push( - // TODO ewwww set_name clash and cant use builder, wtf is this - LinkMut::set_name(apb::new(), Some(&format!("#{}", hash.name))) - .set_link_type(Some(apb::LinkType::Hashtag)) - // TODO do we need to set href too? we can't access context here, quite an issue! - ); + tags.push(hash.ap()); } } apb::Node::object( @@ -71,8 +89,8 @@ pub struct RichObject { pub object: crate::model::object::Model, pub liked: Option, pub attachments: Option>, - pub hashtags: Option>, - pub mentions: Option>, + pub hashtags: Option>, + pub mentions: Option>, } impl FromQueryResult for RichObject { @@ -87,27 +105,17 @@ impl FromQueryResult for RichObject { impl RichObject { pub fn ap(self) -> serde_json::Value { + use apb::ObjectMut; // TODO can we avoid repeating this tags code? let mut tags = Vec::new(); if let Some(mentions) = self.mentions { for mention in mentions { - tags.push( - apb::new() - .set_link_type(Some(apb::LinkType::Mention)) - .set_href(&mention.actor) - // TODO do i need to set name? i could join while batch loading or put the @name in - // each mention object... - ); + tags.push(mention.ap()); } } if let Some(hashtags) = self.hashtags { for hash in hashtags { - tags.push( - // TODO ewwww set_name clash and cant use builder, wtf is this - LinkMut::set_name(apb::new(), Some(&format!("#{}", hash.name))) - .set_link_type(Some(apb::LinkType::Hashtag)) - // TODO do we need to set href too? we can't access context here, quite an issue! - ); + tags.push(hash.ap()); } } self.object.ap() diff --git a/upub/core/src/traits/normalize.rs b/upub/core/src/traits/normalize.rs index 0f01dce..d8da478 100644 --- a/upub/core/src/traits/normalize.rs +++ b/upub/core/src/traits/normalize.rs @@ -127,14 +127,16 @@ impl Normalizer for crate::Context { Node::Empty | Node::Object(_) | Node::Array(_) => {}, Node::Link(l) => match l.link_type() { Ok(apb::LinkType::Mention) => { - let model = crate::model::mention::ActiveModel { - internal: NotSet, - object: Set(object_model.internal), - actor: Set(l.href().to_string()), - }; - crate::model::mention::Entity::insert(model) - .exec(tx) - .await?; + if let Some(internal) = crate::model::actor::Entity::ap_to_internal(l.href(), tx).await? { + let model = crate::model::mention::ActiveModel { + internal: NotSet, + object: Set(object_model.internal), + actor: Set(internal), + }; + crate::model::mention::Entity::insert(model) + .exec(tx) + .await?; + } }, Ok(apb::LinkType::Hashtag) => { let hashtag = l.name() diff --git a/upub/migrations/src/m20240524_000005_create_attachments_tags_mentions.rs b/upub/migrations/src/m20240524_000005_create_attachments_tags_mentions.rs index 934efca..677b4cd 100644 --- a/upub/migrations/src/m20240524_000005_create_attachments_tags_mentions.rs +++ b/upub/migrations/src/m20240524_000005_create_attachments_tags_mentions.rs @@ -1,5 +1,7 @@ use sea_orm_migration::prelude::*; +use crate::m20240524_000001_create_actor_activity_object_tables::Actors; + use super::m20240524_000001_create_actor_activity_object_tables::Objects; #[derive(DeriveIden)] @@ -93,16 +95,15 @@ impl MigrationTrait for Migration { .on_update(ForeignKeyAction::Cascade) .on_delete(ForeignKeyAction::Cascade) ) - .col(ColumnDef::new(Mentions::Actor).string().not_null()) - // .foreign_key( - // ForeignKey::create() - // .name("fkey-mentions-actor") - // .from(Mentions::Table, Mentions::Actor) - // .to(Actors::Table, Actors::Internal) - // .on_update(ForeignKeyAction::Cascade) - // .on_delete(ForeignKeyAction::Cascade) - // ) - .col(ColumnDef::new(Mentions::Published).timestamp_with_time_zone().not_null().default(Expr::current_timestamp())) + .col(ColumnDef::new(Mentions::Actor).big_integer().not_null()) + .foreign_key( + ForeignKey::create() + .name("fkey-mentions-actor") + .from(Mentions::Table, Mentions::Actor) + .to(Actors::Table, Actors::Internal) + .on_update(ForeignKeyAction::Cascade) + .on_delete(ForeignKeyAction::Cascade) + ) .to_owned() ) .await?;