diff --git a/docs/docs/administration/CLI_tasks/database.md b/docs/docs/administration/CLI_tasks/database.md index a6fecb38e..f92cd1a52 100644 --- a/docs/docs/administration/CLI_tasks/database.md +++ b/docs/docs/administration/CLI_tasks/database.md @@ -48,7 +48,10 @@ This will prune remote posts older than 90 days (configurable with [`config :ple ### Options -- `--keep-threads` - Don't prune posts when they are part of a thread where at least one post has seen local interaction (e.g. one of the posts is a local post, or is favourited by a local user, or has been repeated by a local user...). It also wont delete posts when at least one of the posts in that thread has seen recent activity. +- `--keep-followed ` - If set to `posts` all posts and boosts of users with local follows will be kept. + If set to `full` it will additionally keep any posts such users interacted with; this requires `--keep-threads`. + By default this is set to `none` and followed users are not treated special. +- `--keep-threads` - Don't prune posts when they are part of a thread where at least one post has seen local interaction (e.g. one of the posts is a local post, or is favourited by a local user, or has been repeated by a local user...). It also won’t delete posts when at least one of the posts in the thread has seen recent activity or is kept due to `--keep-followed`. - `--keep-non-public` - Keep non-public posts like DM's and followers-only, even if they are remote. - `--limit` - limits how many remote posts get pruned. This limit does **not** apply to any of the follow up jobs. If wanting to keep the database load in check it is thus advisable to run the standalone `prune_orphaned_activities` task with a limit afterwards instead of passing `--prune-orphaned-activities` to this task. - `--prune-orphaned-activities` - Also prune orphaned activities afterwards. Activities are things like Like, Create, Announce, Flag (aka reports)... They can significantly help reduce the database size. diff --git a/lib/mix/tasks/pleroma/database.ex b/lib/mix/tasks/pleroma/database.ex index a746dc8fe..c8b6c2329 100644 --- a/lib/mix/tasks/pleroma/database.ex +++ b/lib/mix/tasks/pleroma/database.ex @@ -135,6 +135,55 @@ defp query_pinned_object_ids() do |> select([o], o.id) end + defp query_followed_remote_user_apids() do + Pleroma.FollowingRelationship + |> join(:inner, [rel], ufing in User, on: rel.following_id == ufing.id) + |> join(:inner, [rel], ufer in User, on: rel.follower_id == ufer.id) + |> where([rel], rel.state == :follow_accept) + |> where([_rel, ufing, ufer], ufer.local and not ufing.local) + |> select([_rel, ufing], %{ap_id: ufing.ap_id}) + end + + defp parse_keep_followed_arg(options) do + case Keyword.get(options, :keep_followed) do + "full" -> :full + "posts" -> :posts + "none" -> false + nil -> false + _ -> raise "Invalid argument for keep_followed! Must be 'full', 'posts' or 'none'" + end + end + + defp maybe_restrict_followed_activities(query, options) do + case Keyword.get(options, :keep_followed) do + :full -> + having( + query, + [a], + fragment( + "bool_and(?->>'actor' NOT IN ?)", + a.data, + subquery(query_followed_remote_user_apids()) + ) + ) + + :posts -> + having( + query, + [a], + not fragment( + "bool_or(?->>'actor' IN ? AND ?->>'type' = ANY('{Create,Announce}'))", + a.data, + subquery(query_followed_remote_user_apids()), + a.data + ) + ) + + _ -> + query + end + end + defp deletable_objects_keeping_threads(time_deadline, limit_cnt, options) do # We want to delete objects from threads where # 1. the newest post is still old @@ -166,6 +215,7 @@ defp deletable_objects_keeping_threads(time_deadline, limit_cnt, options) do |> having([a], max(a.updated_at) < ^time_deadline) |> having([a], not fragment("bool_or(?)", a.local)) |> having([_, b], fragment("max(?::text) is null", b.id)) + |> maybe_restrict_followed_activities(options) |> maybe_limit(limit_cnt) |> select([a], fragment("? ->> 'context'::text", a.data)) @@ -195,6 +245,17 @@ defp deletable_objects_breaking_threads(time_deadline, limit_cnt, options) do [o], fragment("split_part(?->>'actor', '/', 3) != ?", o.data, ^Pleroma.Web.Endpoint.host()) ) + |> then(fn q -> + if Keyword.get(options, :keep_followed) do + where( + q, + [o], + fragment("?->>'actor'", o.data) not in subquery(query_followed_remote_user_apids()) + ) + else + q + end + end) |> maybe_limit(limit_cnt) |> select([o], o.id) @@ -274,6 +335,7 @@ def run(["prune_objects" | args]) do args, strict: [ vacuum: :boolean, + keep_followed: :string, keep_threads: :boolean, keep_non_public: :boolean, prune_orphaned_activities: :boolean, @@ -282,6 +344,13 @@ def run(["prune_objects" | args]) do ] ) + kf = parse_keep_followed_arg(options) + options = Keyword.put(options, :keep_followed, kf) + + if kf == :full and not Keyword.get(options, :keep_threads) do + raise "keep_followed=full only works in conjunction with keep_thread!" + end + start_pleroma() deadline = Pleroma.Config.get([:instance, :remote_post_retention_days]) @@ -292,6 +361,7 @@ def run(["prune_objects" | args]) do "Pruning objects older than #{deadline} days" |> maybe_concat(Keyword.get(options, :keep_non_public), ", keeping non public posts") |> maybe_concat(Keyword.get(options, :keep_threads), ", keeping threads intact") + |> maybe_concat(kf, ", keeping #{kf} activities of followed users") |> maybe_concat(Keyword.get(options, :prune_pinned), ", pruning pinned posts") |> maybe_concat( Keyword.get(options, :prune_orphaned_activities), diff --git a/test/mix/tasks/pleroma/database_test.exs b/test/mix/tasks/pleroma/database_test.exs index 0b9a9e75f..b1de10c9b 100644 --- a/test/mix/tasks/pleroma/database_test.exs +++ b/test/mix/tasks/pleroma/database_test.exs @@ -419,6 +419,85 @@ test "with the --keep-threads option it keeps old threads with bookmarked posts" assert length(Repo.all(Object)) == 1 end + defp prepare_keep_followed_test(old_insert_date) do + remote_user = insert(:user, local: false) + local_user = insert(:user, local: true) + third_party = insert(:user, local: false) + + CommonAPI.follow(local_user, remote_user) + CommonAPI.accept_follow_request(local_user, remote_user) + + assert :follow_accept == Pleroma.FollowingRelationship.get(local_user, remote_user).state + + {:ok, old_remote_post_activity} = + CommonAPI.post(remote_user, %{status: "some thing", local: false}) + + old_remote_post_activity + |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date}) + |> Repo.update!() + + old_remote_post_activity.object + |> Ecto.Changeset.change(%{updated_at: old_insert_date}) + |> Repo.update!() + + {:ok, old_liked_post_activity} = + CommonAPI.post(third_party, %{status: "boo!", local: false}) + + {:ok, old_like_activity} = CommonAPI.favorite(remote_user, old_liked_post_activity.id) + + old_liked_post_activity + |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date}) + |> Repo.update!() + + old_liked_post_activity.object + |> Ecto.Changeset.change(%{updated_at: old_insert_date}) + |> Repo.update!() + + old_like_activity + |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date}) + |> Repo.update!() + + assert length(Repo.all(Object)) == 2 + + {old_remote_post_activity.object.id, old_liked_post_activity.object.id} + end + + test "by default does not keep posts of followed users", %{ + old_insert_date: old_insert_date + } do + _ = prepare_keep_followed_test(old_insert_date) + Mix.Tasks.Pleroma.Database.run(["prune_objects"]) + assert length(Repo.all(Object)) == 0 + end + + test "with the --keep-followed posts option it keeps old posts of followed users", %{ + old_insert_date: old_insert_date + } do + {old_remote_post_id, old_liked_post_id} = + prepare_keep_followed_test(old_insert_date) + + Mix.Tasks.Pleroma.Database.run(["prune_objects", "--keep-followed", "posts"]) + + assert length(Repo.all(Object)) == 1 + assert Object.get_by_id(old_remote_post_id) + refute Object.get_by_id(old_liked_post_id) + end + + test "with the --keep-followed full option it keeps old posts liked by a followed user", %{ + old_insert_date: old_insert_date + } do + _ = prepare_keep_followed_test(old_insert_date) + + Mix.Tasks.Pleroma.Database.run([ + "prune_objects", + "--keep-followed", + "full", + "--keep-threads" + ]) + + assert length(Repo.all(Object)) == 2 + end + test "We don't have unexpected tables which may contain objects that are referenced by activities" do # We can delete orphaned activities. For that we look for the objects they reference in the 'objects', 'activities', and 'users' table. # If someone adds another table with objects (idk, maybe with separate relations, or collections or w/e), then we need to make sure we