mix/database: retain pinned objects by default
Pinned objects and their threads will be refetched on user refresh which by default happens after a day once a user is encountered again in any form including a mention. We observed pruning pinned objects usually results in heavy load for hours after a database prune due to a clogged up remote fetch queue as pinned posts and their threads of many (most?) users get refetched. Thus do not prune pinned posts by default. Keeping closer to earlier behaviour this will still prune threads of pinned posts regardless of --keep-threads if nothing else prevenets it.
This commit is contained in:
parent
be5312228f
commit
8576ec42ec
3 changed files with 95 additions and 1 deletions
|
@ -48,10 +48,12 @@ This will prune remote posts older than 90 days (configurable with [`config :ple
|
|||
|
||||
### Options
|
||||
|
||||
- `--keep-threads` - Don't prune posts when they are part of a thread where at least one post has seen local interaction (e.g. one of the posts is a local post, or is favourited by a local user, or has been repeated by a local user...). It also wont delete posts when at least one of the posts in that thread is kept (e.g. because one of the posts has seen recent activity).
|
||||
- `--keep-threads` - Don't prune posts when they are part of a thread where at least one post has seen local interaction (e.g. one of the posts is a local post, or is favourited by a local user, or has been repeated by a local user...). It also wont delete posts when at least one of the posts in that thread has seen recent activity.
|
||||
- `--keep-non-public` - Keep non-public posts like DM's and followers-only, even if they are remote.
|
||||
- `--limit` - limits how many remote posts get pruned. This limit does **not** apply to any of the follow up jobs. If wanting to keep the database load in check it is thus advisable to run the standalone `prune_orphaned_activities` task with a limit afterwards instead of passing `--prune-orphaned-activities` to this task.
|
||||
- `--prune-orphaned-activities` - Also prune orphaned activities afterwards. Activities are things like Like, Create, Announce, Flag (aka reports)... They can significantly help reduce the database size.
|
||||
- `--prune-pinned` - Also prune pinned posts; keeping pinned posts does not suffice to protect their threads from pruning, even when using `--keep-threads`.
|
||||
Note, if using this option and pinned posts are pruned, they and their threads will just be refetched on the next user update. Therefore it usually doesn't bring much gain while incurring a heavy fetch load after pruning.
|
||||
- `--vacuum` - Run `VACUUM FULL` after the objects are pruned. This should not be used on a regular basis, but is useful if your instance has been running for a long time before pruning.
|
||||
|
||||
## Prune orphaned activities from the database
|
||||
|
|
|
@ -120,6 +120,21 @@ def prune_orphaned_activities(limit \\ 0, opts \\ []) when is_number(limit) do
|
|||
del_single + del_array
|
||||
end
|
||||
|
||||
defp query_pinned_object_apids() do
|
||||
Pleroma.User
|
||||
|> select([u], %{ap_id: fragment("jsonb_object_keys(?)", u.pinned_objects)})
|
||||
end
|
||||
|
||||
defp query_pinned_object_ids() do
|
||||
# If this additional level of subquery is omitted and we directly supply AP ids
|
||||
# to te final query, it appears to overexert PostgreSQL(17)'s planner leading
|
||||
# to a very inefficient query with enormous memory and time consumption.
|
||||
# By supplying database IDs it ends up quite cheap however.
|
||||
Object
|
||||
|> where([o], fragment("?->>'id' IN ?", o.data, subquery(query_pinned_object_apids())))
|
||||
|> select([o], o.id)
|
||||
end
|
||||
|
||||
defp deletable_objects_keeping_threads(time_deadline, limit_cnt, options) do
|
||||
# We want to delete objects from threads where
|
||||
# 1. the newest post is still old
|
||||
|
@ -262,6 +277,7 @@ def run(["prune_objects" | args]) do
|
|||
keep_threads: :boolean,
|
||||
keep_non_public: :boolean,
|
||||
prune_orphaned_activities: :boolean,
|
||||
prune_pinned: :boolean,
|
||||
limit: :integer
|
||||
]
|
||||
)
|
||||
|
@ -276,6 +292,7 @@ def run(["prune_objects" | args]) do
|
|||
"Pruning objects older than #{deadline} days"
|
||||
|> maybe_concat(Keyword.get(options, :keep_non_public), ", keeping non public posts")
|
||||
|> maybe_concat(Keyword.get(options, :keep_threads), ", keeping threads intact")
|
||||
|> maybe_concat(Keyword.get(options, :prune_pinned), ", pruning pinned posts")
|
||||
|> maybe_concat(
|
||||
Keyword.get(options, :prune_orphaned_activities),
|
||||
", pruning orphaned activities"
|
||||
|
@ -293,6 +310,13 @@ def run(["prune_objects" | args]) do
|
|||
else
|
||||
deletable_objects_breaking_threads(time_deadline, limit_cnt, options)
|
||||
end
|
||||
|> then(fn q ->
|
||||
if Keyword.get(options, :prune_pinned) do
|
||||
q
|
||||
else
|
||||
where(q, [o], o.id not in subquery(query_pinned_object_ids()))
|
||||
end
|
||||
end)
|
||||
|> Repo.delete_all(timeout: :infinity)
|
||||
|
||||
Logger.info("Deleted #{del_obj} objects...")
|
||||
|
|
|
@ -88,6 +88,74 @@ test "it prunes old objects from the database", %{old_insert_date: old_insert_da
|
|||
refute Object.get_by_id(note_remote_non_public_id)
|
||||
end
|
||||
|
||||
test "it retains pinned posts by default", %{old_insert_date: old_insert_date} do
|
||||
insert(:note)
|
||||
|
||||
pin_user = insert(:user, local: false)
|
||||
|
||||
%{id: note_remote_pinned_id, data: note_remote_pinned_data} =
|
||||
:note
|
||||
|> insert(user: pin_user)
|
||||
|> Ecto.Changeset.change(%{updated_at: old_insert_date})
|
||||
|> Repo.update!()
|
||||
|
||||
User.add_pinned_object_id(pin_user, note_remote_pinned_data["id"])
|
||||
|
||||
note_remote_non_public =
|
||||
%{id: note_remote_non_public_id, data: note_remote_non_public_data} =
|
||||
:note
|
||||
|> insert()
|
||||
|
||||
note_remote_non_public
|
||||
|> Ecto.Changeset.change(%{
|
||||
updated_at: old_insert_date,
|
||||
data: note_remote_non_public_data |> update_in(["to"], fn _ -> [] end)
|
||||
})
|
||||
|> Repo.update!()
|
||||
|
||||
assert length(Repo.all(Object)) == 3
|
||||
|
||||
Mix.Tasks.Pleroma.Database.run(["prune_objects"])
|
||||
|
||||
assert length(Repo.all(Object)) == 2
|
||||
assert Object.get_by_id(note_remote_pinned_id)
|
||||
refute Object.get_by_id(note_remote_non_public_id)
|
||||
end
|
||||
|
||||
test "it prunes pinned posts with --prune-pinned", %{old_insert_date: old_insert_date} do
|
||||
insert(:note)
|
||||
|
||||
pin_user = insert(:user, local: false)
|
||||
|
||||
%{id: note_remote_pinned_id, data: note_remote_pinned_data} =
|
||||
:note
|
||||
|> insert(user: pin_user)
|
||||
|> Ecto.Changeset.change(%{updated_at: old_insert_date})
|
||||
|> Repo.update!()
|
||||
|
||||
User.add_pinned_object_id(pin_user, note_remote_pinned_data["id"])
|
||||
|
||||
note_remote_non_public =
|
||||
%{id: note_remote_non_public_id, data: note_remote_non_public_data} =
|
||||
:note
|
||||
|> insert()
|
||||
|
||||
note_remote_non_public
|
||||
|> Ecto.Changeset.change(%{
|
||||
updated_at: old_insert_date,
|
||||
data: note_remote_non_public_data |> update_in(["to"], fn _ -> [] end)
|
||||
})
|
||||
|> Repo.update!()
|
||||
|
||||
assert length(Repo.all(Object)) == 3
|
||||
|
||||
Mix.Tasks.Pleroma.Database.run(["prune_objects", "--prune-pinned"])
|
||||
|
||||
assert length(Repo.all(Object)) == 1
|
||||
refute Object.get_by_id(note_remote_pinned_id)
|
||||
refute Object.get_by_id(note_remote_non_public_id)
|
||||
end
|
||||
|
||||
test "it cleans up bookmarks", %{old_insert_date: old_insert_date} do
|
||||
user = insert(:user)
|
||||
{:ok, old_object_activity} = CommonAPI.post(user, %{status: "yadayada"})
|
||||
|
|
Loading…
Add table
Reference in a new issue