feat!: store mentions as internal ids

this completely breaks all current mentions (unless you want to cook
some script to join and fix them) but should mean halving the mentions
table size, or close to that. also theres foreign keys so it cascades
automatically, at the cost that now we can't store mention links for users
we don't know yet
This commit is contained in:
əlemi 2024-06-26 04:37:33 +02:00
parent 9822fc3f07
commit f3f176406e
Signed by: alemi
GPG key ID: A4895B84D311642C
5 changed files with 116 additions and 66 deletions

View file

@ -6,7 +6,7 @@ pub struct Model {
#[sea_orm(primary_key)] #[sea_orm(primary_key)]
pub internal: i64, pub internal: i64,
pub object: i64, pub object: i64,
pub actor: String, pub actor: i64,
} }
#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)]

View file

@ -39,7 +39,7 @@ impl BatchFillable for Vec<RichActivity> {
for element in self.iter_mut() { for element in self.iter_mut() {
if let Some(ref object) = element.object { if let Some(ref object) = element.object {
if let Some(v) = map.remove(&object.internal) { if let Some(v) = map.remove(&object.internal) {
element.accept(v); element.accept(v, tx).await?;
} }
} }
} }
@ -71,7 +71,7 @@ impl BatchFillable for Vec<RichObject> {
} }
for element in self.iter_mut() { for element in self.iter_mut() {
if let Some(v) = map.remove(&element.object.internal) { if let Some(v) = map.remove(&element.object.internal) {
element.accept(v); element.accept(v, tx).await?;
} }
} }
Ok(self) Ok(self)
@ -92,7 +92,7 @@ impl BatchFillable for RichActivity {
.filter(E::comparison(vec![obj.internal])) .filter(E::comparison(vec![obj.internal]))
.all(tx) .all(tx)
.await?; .await?;
self.accept(batch); self.accept(batch, tx).await?;
} }
Ok(self) Ok(self)
} }
@ -111,7 +111,7 @@ impl BatchFillable for RichObject {
.filter(E::comparison(vec![self.object.internal])) .filter(E::comparison(vec![self.object.internal]))
.all(tx) .all(tx)
.await?; .await?;
self.accept(batch); self.accept(batch, tx).await?;
Ok(self) Ok(self)
} }
} }
@ -119,7 +119,9 @@ impl BatchFillable for RichObject {
// welcome to interlocking trait hell, enjoy your stay // welcome to interlocking trait hell, enjoy your stay
mod hell { mod hell {
use sea_orm::{sea_query::IntoCondition, ColumnTrait}; use sea_orm::{sea_query::IntoCondition, ColumnTrait, ConnectionTrait, DbErr, EntityTrait};
use crate::selector::rich::{RichHashtag, RichMention};
pub trait BatchFillableComparison { pub trait BatchFillableComparison {
fn comparison(ids: Vec<i64>) -> sea_orm::Condition; fn comparison(ids: Vec<i64>) -> sea_orm::Condition;
@ -165,43 +167,80 @@ mod hell {
} }
} }
#[async_trait::async_trait]
pub trait BatchFillableAcceptor<B> { pub trait BatchFillableAcceptor<B> {
fn accept(&mut self, batch: B); async fn accept(&mut self, batch: B, tx: &impl ConnectionTrait) -> Result<(), DbErr>;
} }
#[async_trait::async_trait]
impl BatchFillableAcceptor<Vec<crate::model::attachment::Model>> for super::RichActivity { impl BatchFillableAcceptor<Vec<crate::model::attachment::Model>> for super::RichActivity {
fn accept(&mut self, batch: Vec<crate::model::attachment::Model>) { async fn accept(&mut self, batch: Vec<crate::model::attachment::Model>, _tx: &impl ConnectionTrait) -> Result<(), DbErr> {
self.attachments = Some(batch); self.attachments = Some(batch);
Ok(())
} }
} }
#[async_trait::async_trait]
impl BatchFillableAcceptor<Vec<crate::model::hashtag::Model>> for super::RichActivity { impl BatchFillableAcceptor<Vec<crate::model::hashtag::Model>> for super::RichActivity {
fn accept(&mut self, batch: Vec<crate::model::hashtag::Model>) { async fn accept(&mut self, batch: Vec<crate::model::hashtag::Model>, _tx: &impl ConnectionTrait) -> Result<(), DbErr> {
self.hashtags = Some(batch); self.hashtags = Some(batch.into_iter().map(|x| RichHashtag { hash: x }).collect());
Ok(())
} }
} }
#[async_trait::async_trait]
impl BatchFillableAcceptor<Vec<crate::model::mention::Model>> for super::RichActivity { impl BatchFillableAcceptor<Vec<crate::model::mention::Model>> for super::RichActivity {
fn accept(&mut self, batch: Vec<crate::model::mention::Model>) { async fn accept(&mut self, batch: Vec<crate::model::mention::Model>, tx: &impl ConnectionTrait) -> Result<(), DbErr> {
self.mentions = Some(batch); // TODO batch load users from mentions rather than doing for loop
let mut mentions = Vec::new();
for row in batch {
// TODO filter only needed rows
if let Some(user) = crate::model::actor::Entity::find_by_id(row.actor).one(tx).await? {
mentions.push(RichMention {
mention: row,
fqn: format!("@{}@{}", user.preferred_username, user.domain),
id: user.id,
});
}
}
self.mentions = Some(mentions);
Ok(())
} }
} }
#[async_trait::async_trait]
impl BatchFillableAcceptor<Vec<crate::model::attachment::Model>> for super::RichObject { impl BatchFillableAcceptor<Vec<crate::model::attachment::Model>> for super::RichObject {
fn accept(&mut self, batch: Vec<crate::model::attachment::Model>) { async fn accept(&mut self, batch: Vec<crate::model::attachment::Model>, _tx: &impl ConnectionTrait) -> Result<(), DbErr> {
self.attachments = Some(batch); self.attachments = Some(batch);
Ok(())
} }
} }
#[async_trait::async_trait]
impl BatchFillableAcceptor<Vec<crate::model::hashtag::Model>> for super::RichObject { impl BatchFillableAcceptor<Vec<crate::model::hashtag::Model>> for super::RichObject {
fn accept(&mut self, batch: Vec<crate::model::hashtag::Model>) { async fn accept(&mut self, batch: Vec<crate::model::hashtag::Model>, _tx: &impl ConnectionTrait) -> Result<(), DbErr> {
self.hashtags = Some(batch); self.hashtags = Some(batch.into_iter().map(|x| RichHashtag { hash: x }).collect());
Ok(())
} }
} }
#[async_trait::async_trait]
impl BatchFillableAcceptor<Vec<crate::model::mention::Model>> for super::RichObject { impl BatchFillableAcceptor<Vec<crate::model::mention::Model>> for super::RichObject {
fn accept(&mut self, batch: Vec<crate::model::mention::Model>) { async fn accept(&mut self, batch: Vec<crate::model::mention::Model>, tx: &impl ConnectionTrait) -> Result<(), DbErr> {
self.mentions = Some(batch); // TODO batch load users from mentions rather than doing for loop
let mut mentions = Vec::new();
for row in batch {
// TODO filter only needed rows
if let Some(user) = crate::model::actor::Entity::find_by_id(row.actor).one(tx).await? {
mentions.push(RichMention {
mention: row,
fqn: format!("@{}@{}", user.preferred_username, user.domain),
id: user.id,
});
}
}
self.mentions = Some(mentions);
Ok(())
} }
} }
} }

View file

@ -1,14 +1,42 @@
use apb::{ActivityMut, LinkMut, ObjectMut}; use apb::ActivityMut;
use sea_orm::{DbErr, EntityName, FromQueryResult, Iden, QueryResult}; use sea_orm::{DbErr, EntityName, FromQueryResult, Iden, QueryResult};
pub struct RichMention {
pub mention: crate::model::mention::Model,
pub id: String,
pub fqn: String,
}
impl RichMention {
pub fn ap(self) -> serde_json::Value {
use apb::LinkMut;
apb::new()
.set_link_type(Some(apb::LinkType::Mention))
.set_href(&self.id)
.set_name(Some(&self.fqn))
}
}
pub struct RichHashtag {
pub hash: crate::model::hashtag::Model,
}
impl RichHashtag {
pub fn ap(self) -> serde_json::Value {
use apb::LinkMut;
apb::new()
.set_name(Some(&format!("#{}", self.hash.name)))
.set_link_type(Some(apb::LinkType::Hashtag))
}
}
pub struct RichActivity { pub struct RichActivity {
pub activity: crate::model::activity::Model, pub activity: crate::model::activity::Model,
pub object: Option<crate::model::object::Model>, pub object: Option<crate::model::object::Model>,
pub liked: Option<i64>, pub liked: Option<i64>,
pub attachments: Option<Vec<crate::model::attachment::Model>>, pub attachments: Option<Vec<crate::model::attachment::Model>>,
pub hashtags: Option<Vec<crate::model::hashtag::Model>>, pub hashtags: Option<Vec<RichHashtag>>,
pub mentions: Option<Vec<crate::model::mention::Model>>, pub mentions: Option<Vec<RichMention>>,
} }
impl FromQueryResult for RichActivity { impl FromQueryResult for RichActivity {
@ -24,6 +52,7 @@ impl FromQueryResult for RichActivity {
impl RichActivity { impl RichActivity {
pub fn ap(self) -> serde_json::Value { pub fn ap(self) -> serde_json::Value {
use apb::ObjectMut;
let object = match self.object { let object = match self.object {
None => apb::Node::maybe_link(self.activity.object.clone()), None => apb::Node::maybe_link(self.activity.object.clone()),
Some(o) => { Some(o) => {
@ -31,23 +60,12 @@ impl RichActivity {
let mut tags = Vec::new(); let mut tags = Vec::new();
if let Some(mentions) = self.mentions { if let Some(mentions) = self.mentions {
for mention in mentions { for mention in mentions {
tags.push( tags.push(mention.ap());
apb::new()
.set_link_type(Some(apb::LinkType::Mention))
.set_href(&mention.actor)
// TODO do i need to set name? i could join while batch loading or put the @name in
// each mention object...
);
} }
} }
if let Some(hashtags) = self.hashtags { if let Some(hashtags) = self.hashtags {
for hash in hashtags { for hash in hashtags {
tags.push( tags.push(hash.ap());
// TODO ewwww set_name clash and cant use builder, wtf is this
LinkMut::set_name(apb::new(), Some(&format!("#{}", hash.name)))
.set_link_type(Some(apb::LinkType::Hashtag))
// TODO do we need to set href too? we can't access context here, quite an issue!
);
} }
} }
apb::Node::object( apb::Node::object(
@ -71,8 +89,8 @@ pub struct RichObject {
pub object: crate::model::object::Model, pub object: crate::model::object::Model,
pub liked: Option<i64>, pub liked: Option<i64>,
pub attachments: Option<Vec<crate::model::attachment::Model>>, pub attachments: Option<Vec<crate::model::attachment::Model>>,
pub hashtags: Option<Vec<crate::model::hashtag::Model>>, pub hashtags: Option<Vec<RichHashtag>>,
pub mentions: Option<Vec<crate::model::mention::Model>>, pub mentions: Option<Vec<RichMention>>,
} }
impl FromQueryResult for RichObject { impl FromQueryResult for RichObject {
@ -87,27 +105,17 @@ impl FromQueryResult for RichObject {
impl RichObject { impl RichObject {
pub fn ap(self) -> serde_json::Value { pub fn ap(self) -> serde_json::Value {
use apb::ObjectMut;
// TODO can we avoid repeating this tags code? // TODO can we avoid repeating this tags code?
let mut tags = Vec::new(); let mut tags = Vec::new();
if let Some(mentions) = self.mentions { if let Some(mentions) = self.mentions {
for mention in mentions { for mention in mentions {
tags.push( tags.push(mention.ap());
apb::new()
.set_link_type(Some(apb::LinkType::Mention))
.set_href(&mention.actor)
// TODO do i need to set name? i could join while batch loading or put the @name in
// each mention object...
);
} }
} }
if let Some(hashtags) = self.hashtags { if let Some(hashtags) = self.hashtags {
for hash in hashtags { for hash in hashtags {
tags.push( tags.push(hash.ap());
// TODO ewwww set_name clash and cant use builder, wtf is this
LinkMut::set_name(apb::new(), Some(&format!("#{}", hash.name)))
.set_link_type(Some(apb::LinkType::Hashtag))
// TODO do we need to set href too? we can't access context here, quite an issue!
);
} }
} }
self.object.ap() self.object.ap()

View file

@ -127,14 +127,16 @@ impl Normalizer for crate::Context {
Node::Empty | Node::Object(_) | Node::Array(_) => {}, Node::Empty | Node::Object(_) | Node::Array(_) => {},
Node::Link(l) => match l.link_type() { Node::Link(l) => match l.link_type() {
Ok(apb::LinkType::Mention) => { Ok(apb::LinkType::Mention) => {
let model = crate::model::mention::ActiveModel { if let Some(internal) = crate::model::actor::Entity::ap_to_internal(l.href(), tx).await? {
internal: NotSet, let model = crate::model::mention::ActiveModel {
object: Set(object_model.internal), internal: NotSet,
actor: Set(l.href().to_string()), object: Set(object_model.internal),
}; actor: Set(internal),
crate::model::mention::Entity::insert(model) };
.exec(tx) crate::model::mention::Entity::insert(model)
.await?; .exec(tx)
.await?;
}
}, },
Ok(apb::LinkType::Hashtag) => { Ok(apb::LinkType::Hashtag) => {
let hashtag = l.name() let hashtag = l.name()

View file

@ -1,5 +1,7 @@
use sea_orm_migration::prelude::*; use sea_orm_migration::prelude::*;
use crate::m20240524_000001_create_actor_activity_object_tables::Actors;
use super::m20240524_000001_create_actor_activity_object_tables::Objects; use super::m20240524_000001_create_actor_activity_object_tables::Objects;
#[derive(DeriveIden)] #[derive(DeriveIden)]
@ -93,16 +95,15 @@ impl MigrationTrait for Migration {
.on_update(ForeignKeyAction::Cascade) .on_update(ForeignKeyAction::Cascade)
.on_delete(ForeignKeyAction::Cascade) .on_delete(ForeignKeyAction::Cascade)
) )
.col(ColumnDef::new(Mentions::Actor).string().not_null()) .col(ColumnDef::new(Mentions::Actor).big_integer().not_null())
// .foreign_key( .foreign_key(
// ForeignKey::create() ForeignKey::create()
// .name("fkey-mentions-actor") .name("fkey-mentions-actor")
// .from(Mentions::Table, Mentions::Actor) .from(Mentions::Table, Mentions::Actor)
// .to(Actors::Table, Actors::Internal) .to(Actors::Table, Actors::Internal)
// .on_update(ForeignKeyAction::Cascade) .on_update(ForeignKeyAction::Cascade)
// .on_delete(ForeignKeyAction::Cascade) .on_delete(ForeignKeyAction::Cascade)
// ) )
.col(ColumnDef::new(Mentions::Published).timestamp_with_time_zone().not_null().default(Expr::current_timestamp()))
.to_owned() .to_owned()
) )
.await?; .await?;