mix/database: add keep-followed option for object pruning
All checks were successful
ci/woodpecker/pr/lint Pipeline was successful
ci/woodpecker/pr/test/2 Pipeline was successful
ci/woodpecker/pr/test/1 Pipeline was successful
ci/woodpecker/pr/build-arm64 Pipeline was successful
ci/woodpecker/pr/build-amd64 Pipeline was successful
ci/woodpecker/pr/docs Pipeline was successful
ci/woodpecker/pull_request_closed/lint Pipeline was successful
ci/woodpecker/pull_request_closed/test/1 Pipeline was successful
ci/woodpecker/pull_request_closed/test/2 Pipeline was successful
ci/woodpecker/pull_request_closed/build-arm64 Pipeline was successful
ci/woodpecker/pull_request_closed/build-amd64 Pipeline was successful
ci/woodpecker/pull_request_closed/docs Pipeline was successful

This allows to retain posts and boosts of remote actors with local
follows regardless of age.
With the "full" setting this can be taken further treating such
followed actors just like local users even keeping all posts they
liked or reacated to.
This commit is contained in:
Oneric 2025-04-04 03:42:15 +02:00
parent 8576ec42ec
commit 516827c356
3 changed files with 153 additions and 1 deletions

View file

@ -48,7 +48,10 @@ This will prune remote posts older than 90 days (configurable with [`config :ple
### Options
- `--keep-threads` - Don't prune posts when they are part of a thread where at least one post has seen local interaction (e.g. one of the posts is a local post, or is favourited by a local user, or has been repeated by a local user...). It also wont delete posts when at least one of the posts in that thread has seen recent activity.
- `--keep-followed <mode>` - If set to `posts` all posts and boosts of users with local follows will be kept.
If set to `full` it will additionally keep any posts such users interacted with; this requires `--keep-threads`.
By default this is set to `none` and followed users are not treated special.
- `--keep-threads` - Don't prune posts when they are part of a thread where at least one post has seen local interaction (e.g. one of the posts is a local post, or is favourited by a local user, or has been repeated by a local user...). It also wont delete posts when at least one of the posts in the thread has seen recent activity or is kept due to `--keep-followed`.
- `--keep-non-public` - Keep non-public posts like DM's and followers-only, even if they are remote.
- `--limit` - limits how many remote posts get pruned. This limit does **not** apply to any of the follow up jobs. If wanting to keep the database load in check it is thus advisable to run the standalone `prune_orphaned_activities` task with a limit afterwards instead of passing `--prune-orphaned-activities` to this task.
- `--prune-orphaned-activities` - Also prune orphaned activities afterwards. Activities are things like Like, Create, Announce, Flag (aka reports)... They can significantly help reduce the database size.

View file

@ -135,6 +135,55 @@ defp query_pinned_object_ids() do
|> select([o], o.id)
end
defp query_followed_remote_user_apids() do
Pleroma.FollowingRelationship
|> join(:inner, [rel], ufing in User, on: rel.following_id == ufing.id)
|> join(:inner, [rel], ufer in User, on: rel.follower_id == ufer.id)
|> where([rel], rel.state == :follow_accept)
|> where([_rel, ufing, ufer], ufer.local and not ufing.local)
|> select([_rel, ufing], %{ap_id: ufing.ap_id})
end
defp parse_keep_followed_arg(options) do
case Keyword.get(options, :keep_followed) do
"full" -> :full
"posts" -> :posts
"none" -> false
nil -> false
_ -> raise "Invalid argument for keep_followed! Must be 'full', 'posts' or 'none'"
end
end
defp maybe_restrict_followed_activities(query, options) do
case Keyword.get(options, :keep_followed) do
:full ->
having(
query,
[a],
fragment(
"bool_and(?->>'actor' NOT IN ?)",
a.data,
subquery(query_followed_remote_user_apids())
)
)
:posts ->
having(
query,
[a],
not fragment(
"bool_or(?->>'actor' IN ? AND ?->>'type' = ANY('{Create,Announce}'))",
a.data,
subquery(query_followed_remote_user_apids()),
a.data
)
)
_ ->
query
end
end
defp deletable_objects_keeping_threads(time_deadline, limit_cnt, options) do
# We want to delete objects from threads where
# 1. the newest post is still old
@ -166,6 +215,7 @@ defp deletable_objects_keeping_threads(time_deadline, limit_cnt, options) do
|> having([a], max(a.updated_at) < ^time_deadline)
|> having([a], not fragment("bool_or(?)", a.local))
|> having([_, b], fragment("max(?::text) is null", b.id))
|> maybe_restrict_followed_activities(options)
|> maybe_limit(limit_cnt)
|> select([a], fragment("? ->> 'context'::text", a.data))
@ -195,6 +245,17 @@ defp deletable_objects_breaking_threads(time_deadline, limit_cnt, options) do
[o],
fragment("split_part(?->>'actor', '/', 3) != ?", o.data, ^Pleroma.Web.Endpoint.host())
)
|> then(fn q ->
if Keyword.get(options, :keep_followed) do
where(
q,
[o],
fragment("?->>'actor'", o.data) not in subquery(query_followed_remote_user_apids())
)
else
q
end
end)
|> maybe_limit(limit_cnt)
|> select([o], o.id)
@ -274,6 +335,7 @@ def run(["prune_objects" | args]) do
args,
strict: [
vacuum: :boolean,
keep_followed: :string,
keep_threads: :boolean,
keep_non_public: :boolean,
prune_orphaned_activities: :boolean,
@ -282,6 +344,13 @@ def run(["prune_objects" | args]) do
]
)
kf = parse_keep_followed_arg(options)
options = Keyword.put(options, :keep_followed, kf)
if kf == :full and not Keyword.get(options, :keep_threads) do
raise "keep_followed=full only works in conjunction with keep_thread!"
end
start_pleroma()
deadline = Pleroma.Config.get([:instance, :remote_post_retention_days])
@ -292,6 +361,7 @@ def run(["prune_objects" | args]) do
"Pruning objects older than #{deadline} days"
|> maybe_concat(Keyword.get(options, :keep_non_public), ", keeping non public posts")
|> maybe_concat(Keyword.get(options, :keep_threads), ", keeping threads intact")
|> maybe_concat(kf, ", keeping #{kf} activities of followed users")
|> maybe_concat(Keyword.get(options, :prune_pinned), ", pruning pinned posts")
|> maybe_concat(
Keyword.get(options, :prune_orphaned_activities),

View file

@ -419,6 +419,85 @@ test "with the --keep-threads option it keeps old threads with bookmarked posts"
assert length(Repo.all(Object)) == 1
end
defp prepare_keep_followed_test(old_insert_date) do
remote_user = insert(:user, local: false)
local_user = insert(:user, local: true)
third_party = insert(:user, local: false)
CommonAPI.follow(local_user, remote_user)
CommonAPI.accept_follow_request(local_user, remote_user)
assert :follow_accept == Pleroma.FollowingRelationship.get(local_user, remote_user).state
{:ok, old_remote_post_activity} =
CommonAPI.post(remote_user, %{status: "some thing", local: false})
old_remote_post_activity
|> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
|> Repo.update!()
old_remote_post_activity.object
|> Ecto.Changeset.change(%{updated_at: old_insert_date})
|> Repo.update!()
{:ok, old_liked_post_activity} =
CommonAPI.post(third_party, %{status: "boo!", local: false})
{:ok, old_like_activity} = CommonAPI.favorite(remote_user, old_liked_post_activity.id)
old_liked_post_activity
|> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
|> Repo.update!()
old_liked_post_activity.object
|> Ecto.Changeset.change(%{updated_at: old_insert_date})
|> Repo.update!()
old_like_activity
|> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
|> Repo.update!()
assert length(Repo.all(Object)) == 2
{old_remote_post_activity.object.id, old_liked_post_activity.object.id}
end
test "by default does not keep posts of followed users", %{
old_insert_date: old_insert_date
} do
_ = prepare_keep_followed_test(old_insert_date)
Mix.Tasks.Pleroma.Database.run(["prune_objects"])
assert length(Repo.all(Object)) == 0
end
test "with the --keep-followed posts option it keeps old posts of followed users", %{
old_insert_date: old_insert_date
} do
{old_remote_post_id, old_liked_post_id} =
prepare_keep_followed_test(old_insert_date)
Mix.Tasks.Pleroma.Database.run(["prune_objects", "--keep-followed", "posts"])
assert length(Repo.all(Object)) == 1
assert Object.get_by_id(old_remote_post_id)
refute Object.get_by_id(old_liked_post_id)
end
test "with the --keep-followed full option it keeps old posts liked by a followed user", %{
old_insert_date: old_insert_date
} do
_ = prepare_keep_followed_test(old_insert_date)
Mix.Tasks.Pleroma.Database.run([
"prune_objects",
"--keep-followed",
"full",
"--keep-threads"
])
assert length(Repo.all(Object)) == 2
end
test "We don't have unexpected tables which may contain objects that are referenced by activities" do
# We can delete orphaned activities. For that we look for the objects they reference in the 'objects', 'activities', and 'users' table.
# If someone adds another table with objects (idk, maybe with separate relations, or collections or w/e), then we need to make sure we