From 516827c356d3c1fae0b79419c1e33eb08ef7a682 Mon Sep 17 00:00:00 2001 From: Oneric Date: Fri, 4 Apr 2025 03:42:15 +0200 Subject: [PATCH] mix/database: add keep-followed option for object pruning This allows to retain posts and boosts of remote actors with local follows regardless of age. With the "full" setting this can be taken further treating such followed actors just like local users even keeping all posts they liked or reacated to. --- .../docs/administration/CLI_tasks/database.md | 5 +- lib/mix/tasks/pleroma/database.ex | 70 ++++++++++++++++ test/mix/tasks/pleroma/database_test.exs | 79 +++++++++++++++++++ 3 files changed, 153 insertions(+), 1 deletion(-) diff --git a/docs/docs/administration/CLI_tasks/database.md b/docs/docs/administration/CLI_tasks/database.md index a6fecb38e..f92cd1a52 100644 --- a/docs/docs/administration/CLI_tasks/database.md +++ b/docs/docs/administration/CLI_tasks/database.md @@ -48,7 +48,10 @@ This will prune remote posts older than 90 days (configurable with [`config :ple ### Options -- `--keep-threads` - Don't prune posts when they are part of a thread where at least one post has seen local interaction (e.g. one of the posts is a local post, or is favourited by a local user, or has been repeated by a local user...). It also wont delete posts when at least one of the posts in that thread has seen recent activity. +- `--keep-followed ` - If set to `posts` all posts and boosts of users with local follows will be kept. + If set to `full` it will additionally keep any posts such users interacted with; this requires `--keep-threads`. + By default this is set to `none` and followed users are not treated special. +- `--keep-threads` - Don't prune posts when they are part of a thread where at least one post has seen local interaction (e.g. one of the posts is a local post, or is favourited by a local user, or has been repeated by a local user...). It also won’t delete posts when at least one of the posts in the thread has seen recent activity or is kept due to `--keep-followed`. - `--keep-non-public` - Keep non-public posts like DM's and followers-only, even if they are remote. - `--limit` - limits how many remote posts get pruned. This limit does **not** apply to any of the follow up jobs. If wanting to keep the database load in check it is thus advisable to run the standalone `prune_orphaned_activities` task with a limit afterwards instead of passing `--prune-orphaned-activities` to this task. - `--prune-orphaned-activities` - Also prune orphaned activities afterwards. Activities are things like Like, Create, Announce, Flag (aka reports)... They can significantly help reduce the database size. diff --git a/lib/mix/tasks/pleroma/database.ex b/lib/mix/tasks/pleroma/database.ex index a746dc8fe..c8b6c2329 100644 --- a/lib/mix/tasks/pleroma/database.ex +++ b/lib/mix/tasks/pleroma/database.ex @@ -135,6 +135,55 @@ defp query_pinned_object_ids() do |> select([o], o.id) end + defp query_followed_remote_user_apids() do + Pleroma.FollowingRelationship + |> join(:inner, [rel], ufing in User, on: rel.following_id == ufing.id) + |> join(:inner, [rel], ufer in User, on: rel.follower_id == ufer.id) + |> where([rel], rel.state == :follow_accept) + |> where([_rel, ufing, ufer], ufer.local and not ufing.local) + |> select([_rel, ufing], %{ap_id: ufing.ap_id}) + end + + defp parse_keep_followed_arg(options) do + case Keyword.get(options, :keep_followed) do + "full" -> :full + "posts" -> :posts + "none" -> false + nil -> false + _ -> raise "Invalid argument for keep_followed! Must be 'full', 'posts' or 'none'" + end + end + + defp maybe_restrict_followed_activities(query, options) do + case Keyword.get(options, :keep_followed) do + :full -> + having( + query, + [a], + fragment( + "bool_and(?->>'actor' NOT IN ?)", + a.data, + subquery(query_followed_remote_user_apids()) + ) + ) + + :posts -> + having( + query, + [a], + not fragment( + "bool_or(?->>'actor' IN ? AND ?->>'type' = ANY('{Create,Announce}'))", + a.data, + subquery(query_followed_remote_user_apids()), + a.data + ) + ) + + _ -> + query + end + end + defp deletable_objects_keeping_threads(time_deadline, limit_cnt, options) do # We want to delete objects from threads where # 1. the newest post is still old @@ -166,6 +215,7 @@ defp deletable_objects_keeping_threads(time_deadline, limit_cnt, options) do |> having([a], max(a.updated_at) < ^time_deadline) |> having([a], not fragment("bool_or(?)", a.local)) |> having([_, b], fragment("max(?::text) is null", b.id)) + |> maybe_restrict_followed_activities(options) |> maybe_limit(limit_cnt) |> select([a], fragment("? ->> 'context'::text", a.data)) @@ -195,6 +245,17 @@ defp deletable_objects_breaking_threads(time_deadline, limit_cnt, options) do [o], fragment("split_part(?->>'actor', '/', 3) != ?", o.data, ^Pleroma.Web.Endpoint.host()) ) + |> then(fn q -> + if Keyword.get(options, :keep_followed) do + where( + q, + [o], + fragment("?->>'actor'", o.data) not in subquery(query_followed_remote_user_apids()) + ) + else + q + end + end) |> maybe_limit(limit_cnt) |> select([o], o.id) @@ -274,6 +335,7 @@ def run(["prune_objects" | args]) do args, strict: [ vacuum: :boolean, + keep_followed: :string, keep_threads: :boolean, keep_non_public: :boolean, prune_orphaned_activities: :boolean, @@ -282,6 +344,13 @@ def run(["prune_objects" | args]) do ] ) + kf = parse_keep_followed_arg(options) + options = Keyword.put(options, :keep_followed, kf) + + if kf == :full and not Keyword.get(options, :keep_threads) do + raise "keep_followed=full only works in conjunction with keep_thread!" + end + start_pleroma() deadline = Pleroma.Config.get([:instance, :remote_post_retention_days]) @@ -292,6 +361,7 @@ def run(["prune_objects" | args]) do "Pruning objects older than #{deadline} days" |> maybe_concat(Keyword.get(options, :keep_non_public), ", keeping non public posts") |> maybe_concat(Keyword.get(options, :keep_threads), ", keeping threads intact") + |> maybe_concat(kf, ", keeping #{kf} activities of followed users") |> maybe_concat(Keyword.get(options, :prune_pinned), ", pruning pinned posts") |> maybe_concat( Keyword.get(options, :prune_orphaned_activities), diff --git a/test/mix/tasks/pleroma/database_test.exs b/test/mix/tasks/pleroma/database_test.exs index 0b9a9e75f..b1de10c9b 100644 --- a/test/mix/tasks/pleroma/database_test.exs +++ b/test/mix/tasks/pleroma/database_test.exs @@ -419,6 +419,85 @@ test "with the --keep-threads option it keeps old threads with bookmarked posts" assert length(Repo.all(Object)) == 1 end + defp prepare_keep_followed_test(old_insert_date) do + remote_user = insert(:user, local: false) + local_user = insert(:user, local: true) + third_party = insert(:user, local: false) + + CommonAPI.follow(local_user, remote_user) + CommonAPI.accept_follow_request(local_user, remote_user) + + assert :follow_accept == Pleroma.FollowingRelationship.get(local_user, remote_user).state + + {:ok, old_remote_post_activity} = + CommonAPI.post(remote_user, %{status: "some thing", local: false}) + + old_remote_post_activity + |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date}) + |> Repo.update!() + + old_remote_post_activity.object + |> Ecto.Changeset.change(%{updated_at: old_insert_date}) + |> Repo.update!() + + {:ok, old_liked_post_activity} = + CommonAPI.post(third_party, %{status: "boo!", local: false}) + + {:ok, old_like_activity} = CommonAPI.favorite(remote_user, old_liked_post_activity.id) + + old_liked_post_activity + |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date}) + |> Repo.update!() + + old_liked_post_activity.object + |> Ecto.Changeset.change(%{updated_at: old_insert_date}) + |> Repo.update!() + + old_like_activity + |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date}) + |> Repo.update!() + + assert length(Repo.all(Object)) == 2 + + {old_remote_post_activity.object.id, old_liked_post_activity.object.id} + end + + test "by default does not keep posts of followed users", %{ + old_insert_date: old_insert_date + } do + _ = prepare_keep_followed_test(old_insert_date) + Mix.Tasks.Pleroma.Database.run(["prune_objects"]) + assert length(Repo.all(Object)) == 0 + end + + test "with the --keep-followed posts option it keeps old posts of followed users", %{ + old_insert_date: old_insert_date + } do + {old_remote_post_id, old_liked_post_id} = + prepare_keep_followed_test(old_insert_date) + + Mix.Tasks.Pleroma.Database.run(["prune_objects", "--keep-followed", "posts"]) + + assert length(Repo.all(Object)) == 1 + assert Object.get_by_id(old_remote_post_id) + refute Object.get_by_id(old_liked_post_id) + end + + test "with the --keep-followed full option it keeps old posts liked by a followed user", %{ + old_insert_date: old_insert_date + } do + _ = prepare_keep_followed_test(old_insert_date) + + Mix.Tasks.Pleroma.Database.run([ + "prune_objects", + "--keep-followed", + "full", + "--keep-threads" + ]) + + assert length(Repo.all(Object)) == 2 + end + test "We don't have unexpected tables which may contain objects that are referenced by activities" do # We can delete orphaned activities. For that we look for the objects they reference in the 'objects', 'activities', and 'users' table. # If someone adds another table with objects (idk, maybe with separate relations, or collections or w/e), then we need to make sure we