mix/database: retain pinned objects by default

Pinned objects and their threads will be refetched
on user refresh which by default happens after a day
once a user is encountered again in any form including a mention.

We observed pruning pinned objects usually results in heavy load for
hours after a database prune due to a clogged up remote fetch queue as
pinned posts and their threads of many (most?) users get refetched.

Thus do not prune pinned posts by default.
Keeping closer to earlier behaviour this will still prune threads of
pinned posts regardless of --keep-threads if nothing else prevenets it.
This commit is contained in:
Oneric 2025-04-04 02:53:09 +02:00
parent be5312228f
commit 8576ec42ec
3 changed files with 95 additions and 1 deletions

View file

@ -48,10 +48,12 @@ This will prune remote posts older than 90 days (configurable with [`config :ple
### Options
- `--keep-threads` - Don't prune posts when they are part of a thread where at least one post has seen local interaction (e.g. one of the posts is a local post, or is favourited by a local user, or has been repeated by a local user...). It also wont delete posts when at least one of the posts in that thread is kept (e.g. because one of the posts has seen recent activity).
- `--keep-threads` - Don't prune posts when they are part of a thread where at least one post has seen local interaction (e.g. one of the posts is a local post, or is favourited by a local user, or has been repeated by a local user...). It also wont delete posts when at least one of the posts in that thread has seen recent activity.
- `--keep-non-public` - Keep non-public posts like DM's and followers-only, even if they are remote.
- `--limit` - limits how many remote posts get pruned. This limit does **not** apply to any of the follow up jobs. If wanting to keep the database load in check it is thus advisable to run the standalone `prune_orphaned_activities` task with a limit afterwards instead of passing `--prune-orphaned-activities` to this task.
- `--prune-orphaned-activities` - Also prune orphaned activities afterwards. Activities are things like Like, Create, Announce, Flag (aka reports)... They can significantly help reduce the database size.
- `--prune-pinned` - Also prune pinned posts; keeping pinned posts does not suffice to protect their threads from pruning, even when using `--keep-threads`.
Note, if using this option and pinned posts are pruned, they and their threads will just be refetched on the next user update. Therefore it usually doesn't bring much gain while incurring a heavy fetch load after pruning.
- `--vacuum` - Run `VACUUM FULL` after the objects are pruned. This should not be used on a regular basis, but is useful if your instance has been running for a long time before pruning.
## Prune orphaned activities from the database

View file

@ -120,6 +120,21 @@ def prune_orphaned_activities(limit \\ 0, opts \\ []) when is_number(limit) do
del_single + del_array
end
defp query_pinned_object_apids() do
Pleroma.User
|> select([u], %{ap_id: fragment("jsonb_object_keys(?)", u.pinned_objects)})
end
defp query_pinned_object_ids() do
# If this additional level of subquery is omitted and we directly supply AP ids
# to te final query, it appears to overexert PostgreSQL(17)'s planner leading
# to a very inefficient query with enormous memory and time consumption.
# By supplying database IDs it ends up quite cheap however.
Object
|> where([o], fragment("?->>'id' IN ?", o.data, subquery(query_pinned_object_apids())))
|> select([o], o.id)
end
defp deletable_objects_keeping_threads(time_deadline, limit_cnt, options) do
# We want to delete objects from threads where
# 1. the newest post is still old
@ -262,6 +277,7 @@ def run(["prune_objects" | args]) do
keep_threads: :boolean,
keep_non_public: :boolean,
prune_orphaned_activities: :boolean,
prune_pinned: :boolean,
limit: :integer
]
)
@ -276,6 +292,7 @@ def run(["prune_objects" | args]) do
"Pruning objects older than #{deadline} days"
|> maybe_concat(Keyword.get(options, :keep_non_public), ", keeping non public posts")
|> maybe_concat(Keyword.get(options, :keep_threads), ", keeping threads intact")
|> maybe_concat(Keyword.get(options, :prune_pinned), ", pruning pinned posts")
|> maybe_concat(
Keyword.get(options, :prune_orphaned_activities),
", pruning orphaned activities"
@ -293,6 +310,13 @@ def run(["prune_objects" | args]) do
else
deletable_objects_breaking_threads(time_deadline, limit_cnt, options)
end
|> then(fn q ->
if Keyword.get(options, :prune_pinned) do
q
else
where(q, [o], o.id not in subquery(query_pinned_object_ids()))
end
end)
|> Repo.delete_all(timeout: :infinity)
Logger.info("Deleted #{del_obj} objects...")

View file

@ -88,6 +88,74 @@ test "it prunes old objects from the database", %{old_insert_date: old_insert_da
refute Object.get_by_id(note_remote_non_public_id)
end
test "it retains pinned posts by default", %{old_insert_date: old_insert_date} do
insert(:note)
pin_user = insert(:user, local: false)
%{id: note_remote_pinned_id, data: note_remote_pinned_data} =
:note
|> insert(user: pin_user)
|> Ecto.Changeset.change(%{updated_at: old_insert_date})
|> Repo.update!()
User.add_pinned_object_id(pin_user, note_remote_pinned_data["id"])
note_remote_non_public =
%{id: note_remote_non_public_id, data: note_remote_non_public_data} =
:note
|> insert()
note_remote_non_public
|> Ecto.Changeset.change(%{
updated_at: old_insert_date,
data: note_remote_non_public_data |> update_in(["to"], fn _ -> [] end)
})
|> Repo.update!()
assert length(Repo.all(Object)) == 3
Mix.Tasks.Pleroma.Database.run(["prune_objects"])
assert length(Repo.all(Object)) == 2
assert Object.get_by_id(note_remote_pinned_id)
refute Object.get_by_id(note_remote_non_public_id)
end
test "it prunes pinned posts with --prune-pinned", %{old_insert_date: old_insert_date} do
insert(:note)
pin_user = insert(:user, local: false)
%{id: note_remote_pinned_id, data: note_remote_pinned_data} =
:note
|> insert(user: pin_user)
|> Ecto.Changeset.change(%{updated_at: old_insert_date})
|> Repo.update!()
User.add_pinned_object_id(pin_user, note_remote_pinned_data["id"])
note_remote_non_public =
%{id: note_remote_non_public_id, data: note_remote_non_public_data} =
:note
|> insert()
note_remote_non_public
|> Ecto.Changeset.change(%{
updated_at: old_insert_date,
data: note_remote_non_public_data |> update_in(["to"], fn _ -> [] end)
})
|> Repo.update!()
assert length(Repo.all(Object)) == 3
Mix.Tasks.Pleroma.Database.run(["prune_objects", "--prune-pinned"])
assert length(Repo.all(Object)) == 1
refute Object.get_by_id(note_remote_pinned_id)
refute Object.get_by_id(note_remote_non_public_id)
end
test "it cleans up bookmarks", %{old_insert_date: old_insert_date} do
user = insert(:user)
{:ok, old_object_activity} = CommonAPI.post(user, %{status: "yadayada"})