deliver Delete activities to all known instances #198

Merged
toast merged 4 commits from deliver-delete-everyone into main 2022-10-16 13:46:24 +00:00
2 changed files with 49 additions and 6 deletions

View file

@ -8,6 +8,10 @@ interface IRecipe {
type: string;
}
interface IEveryoneRecipe extends IRecipe {
type: 'Everyone';
}
interface IFollowersRecipe extends IRecipe {
type: 'Followers';
}
@ -17,6 +21,9 @@ interface IDirectRecipe extends IRecipe {
to: IRemoteUser;
}
const isEveryone = (recipe: any): recipe is IEveryoneRecipe =>
recipe.type === 'Everyone';
const isFollowers = (recipe: any): recipe is IFollowersRecipe =>
recipe.type === 'Followers';
@ -63,6 +70,13 @@ export default class DeliverManager {
this.addRecipe(recipe);
}
/**
* Add recipe to send this activity to all known sharedInboxes
*/
public addEveryone() {
this.addRecipe({ type: 'Everyone' } as IEveryoneRecipe);
}
/**
* Add recipe
* @param recipe Recipe
@ -82,9 +96,26 @@ export default class DeliverManager {
/*
build inbox list
Process follower recipes first to avoid duplication when processing
direct recipes later.
Processing order matters to avoid duplication.
*/
if (this.recipes.some(r => isEveryone(r))) {
// deliver to all of known network
const sharedInboxes = await Users.createQueryBuilder('users')
Johann150 marked this conversation as resolved
Review

SELECT DISTINCT would be preferable. Overall, it seems that target inboxes are not deduplicated.

`SELECT DISTINCT` would be preferable. Overall, it seems that target inboxes are not deduplicated.
Review

That is what the groupBy in line 111 was meant for. I didn't know, but just checked and there is .distinct(true) available for query builders to achieve SELECT DISTINCT. I'm not sure if it makes a difference to GROUP BY.

The overall deduplication is achieved because inboxes is a Set and not an array.

That is what the `groupBy` in line 111 was meant for. I didn't know, but just checked and there is `.distinct(true)` available for query builders to achieve `SELECT DISTINCT`. I'm not sure if it makes a difference to `GROUP BY`. The overall deduplication is achieved because `inboxes` is a `Set` and not an array.
Review

Best way to check that would be via Postgres EXPLAIN ANALYZE, I believe.

Best way to check that would be via Postgres `EXPLAIN ANALYZE`, I believe.
Review

I ran it myself but I also had a look at this Stackoverflow answer.

result of running EXPLAIN ANALYZE. tl;dr the query plans are identical
misskey=> explain analyze select distinct "sharedInbox" from "user" where "sharedInbox" is not null and "host" is not null;
                                                    QUERY PLAN
------------------------------------------------------------------------------------------------------------------
 HashAggregate  (cost=1482.82..1509.61 rows=2679 width=28) (actual time=14.398..14.724 rows=2684 loops=1)
   Group Key: "sharedInbox"
   Batches: 1  Memory Usage: 369kB
   ->  Seq Scan on "user"  (cost=0.00..1427.35 rows=22187 width=28) (actual time=0.013..9.184 rows=22216 loops=1)
         Filter: (("sharedInbox" IS NOT NULL) AND (host IS NOT NULL))
         Rows Removed by Filter: 144
 Planning Time: 0.798 ms
 Execution Time: 14.933 ms
(8 rows)

misskey=> explain analyze select "sharedInbox" from "user" where "sharedInbox" is not null and "host" is not null group by "sharedInbox";
                                                    QUERY PLAN
------------------------------------------------------------------------------------------------------------------
 HashAggregate  (cost=1482.82..1509.61 rows=2679 width=28) (actual time=14.026..14.350 rows=2684 loops=1)
   Group Key: "sharedInbox"
   Batches: 1  Memory Usage: 369kB
   ->  Seq Scan on "user"  (cost=0.00..1427.35 rows=22187 width=28) (actual time=0.010..8.622 rows=22216 loops=1)
         Filter: (("sharedInbox" IS NOT NULL) AND (host IS NOT NULL))
         Rows Removed by Filter: 144
 Planning Time: 0.124 ms
 Execution Time: 14.491 ms
(8 rows)

misskey=> select version();
                                                          version
---------------------------------------------------------------------------------------------------------------------------
 PostgreSQL 13.8 (Debian 13.8-0+deb11u1) on x86_64-pc-linux-gnu, compiled by gcc (Debian 10.2.1-6) 10.2.1 20210110, 64-bit
(1 row)

From the Stackoverflow answer:

For only a few records (f.e. 100000) it doesn't really matter. Both will use the same HashAggregate method. [...]
But for a larger recordset there's a difference. [...]
So in this example the DISTINCT was still faster. [...]

I ran it myself but I also had a look at [this Stackoverflow answer](https://stackoverflow.com/a/70436474). <details><summary>result of running <code>EXPLAIN ANALYZE</code>. tl;dr the query plans are identical</summary> ```text misskey=> explain analyze select distinct "sharedInbox" from "user" where "sharedInbox" is not null and "host" is not null; QUERY PLAN ------------------------------------------------------------------------------------------------------------------ HashAggregate (cost=1482.82..1509.61 rows=2679 width=28) (actual time=14.398..14.724 rows=2684 loops=1) Group Key: "sharedInbox" Batches: 1 Memory Usage: 369kB -> Seq Scan on "user" (cost=0.00..1427.35 rows=22187 width=28) (actual time=0.013..9.184 rows=22216 loops=1) Filter: (("sharedInbox" IS NOT NULL) AND (host IS NOT NULL)) Rows Removed by Filter: 144 Planning Time: 0.798 ms Execution Time: 14.933 ms (8 rows) misskey=> explain analyze select "sharedInbox" from "user" where "sharedInbox" is not null and "host" is not null group by "sharedInbox"; QUERY PLAN ------------------------------------------------------------------------------------------------------------------ HashAggregate (cost=1482.82..1509.61 rows=2679 width=28) (actual time=14.026..14.350 rows=2684 loops=1) Group Key: "sharedInbox" Batches: 1 Memory Usage: 369kB -> Seq Scan on "user" (cost=0.00..1427.35 rows=22187 width=28) (actual time=0.010..8.622 rows=22216 loops=1) Filter: (("sharedInbox" IS NOT NULL) AND (host IS NOT NULL)) Rows Removed by Filter: 144 Planning Time: 0.124 ms Execution Time: 14.491 ms (8 rows) misskey=> select version(); version --------------------------------------------------------------------------------------------------------------------------- PostgreSQL 13.8 (Debian 13.8-0+deb11u1) on x86_64-pc-linux-gnu, compiled by gcc (Debian 10.2.1-6) 10.2.1 20210110, 64-bit (1 row) ``` </details> From the Stackoverflow answer: > For only a few records (f.e. 100000) it doesn't really matter. Both will use the same HashAggregate method. [...] > But for a larger recordset there's a difference. [...] > So in this example the `DISTINCT` was still faster. [...]
Review

Fun thing just happend, postgres 15 came out and...

Queries using SELECT DISTINCT can now be executed in parallel.

Would be nice to see if we're affected.

Fun thing just happend, postgres 15 came out and... > Queries using SELECT DISTINCT can now be executed in parallel. Would be nice to see if we're affected.
.select('users.sharedInbox', 'sharedInbox')
// so we don't have to make our inboxes Set work as hard
.distinct(true)
// can't deliver to unknown shared inbox
.where('users.sharedInbox IS NOT NULL')
// don't deliver to ourselves
.andWhere('users.host IS NOT NULL')
.getRawMany();
for (const inbox of sharedInboxes) {
inboxes.add(inbox.sharedInbox);
}
}
if (this.recipes.some(r => isFollowers(r))) {
// followers deliver
// TODO: SELECT DISTINCT ON ("followerSharedInbox") "followerSharedInbox" みたいな問い合わせにすればよりパフォーマンス向上できそう

View file

@ -10,7 +10,7 @@ import { User, ILocalUser, IRemoteUser } from '@/models/entities/user.js';
import { Note } from '@/models/entities/note.js';
import { Notes, Users, Instances } from '@/models/index.js';
import { notesChart, perUserNotesChart, instanceChart } from '@/services/chart/index.js';
import { deliverToFollowers, deliverToUser } from '@/remote/activitypub/deliver-manager.js';
import DeliverManager from '@/remote/activitypub/deliver-manager.js';
import { countSameRenotes } from '@/misc/count-same-renotes.js';
import { isPureRenote } from '@/misc/renote.js';
import { registerOrFetchInstanceDoc } from '../register-or-fetch-instance-doc.js';
@ -132,10 +132,22 @@ async function getMentionedRemoteUsers(note: Note): Promise<IRemoteUser[]> {
}
async function deliverToConcerned(user: { id: ILocalUser['id']; host: null; }, note: Note, content: any) {
deliverToFollowers(user, content);
deliverToRelays(user, content);
const manager = new DeliverManager(user, content);
const remoteUsers = await getMentionedRemoteUsers(note);
for (const remoteUser of remoteUsers) {
deliverToUser(user, content, remoteUser);
manager.addDirectRecipe(remoteUser);
}
if (['public', 'home', 'followers'].includes(note.visibility)) {
manager.addFollowersRecipe();
}
if (['public', 'home'].includes(note.visibility)) {
manager.addEveryone();
}
await manager.execute();
deliverToRelays(user, content);
}