[TEST] dbprune: fuzzymode

Splitting it up hopefully helps with load and OOMs (on smaller VPSes).
But untested.

Also: technically can delete activities referencing actually existing
objects, but atm won't happen for anything we understand in the first
place. But risks future changes leading to loss of (remote) data.....
Local activities are never affected though

Less severe this might also miss some technically deleteable entries
if they’ll later get dealt with by the regular auto pruning anyway.
This commit is contained in:
Oneric 2024-05-15 03:38:58 +02:00
parent c127d48308
commit 2e384dcda9
2 changed files with 95 additions and 2 deletions

View file

@ -82,6 +82,7 @@ when all orphaned activities have been deleted.
- `--limit n` - Only delete up to `n` activities in each query making up this job, i.e. if this job runs two queries at most `2n` activities will be deleted. Running this task repeatedly in limited batches can help maintain the instances responsiveness while still freeing up some space.
- `--no-singles` - Do not delete activites referencing single objects
- `--no-arrays` - Do not delete activites referencing an array of objects
- `--fuzzy` - Run fuzzy delete subqueries where supported; potentially more resource friendly but might miss a few activities or rarely delete some actually referenced remote activities (local data is never deleted)
## Create a conversation for all existing DMs

View file

@ -36,8 +36,98 @@ defp limit_statement(limit) when is_number(limit) do
end
end
defp restrict_to_local_singles_type(query, types) do
query
|> where(
[a],
not a.local and
fragment("?->>'type' = ANY(?)", a.data, ^types) and
fragment("jsonb_typeof(?->'object') = 'string'", a.data)
)
end
defp prune_orphaned_activities_singles(limit) do
defp prune_activity_by_id(idquery) do
from(
a in Pleroma.Activity,
where: a.id in subquery(idquery)
)
|> Repo.delete_all()
end
defp prune_orphaned_activities_singles(limit, opts)
defp prune_orphaned_activities_singles(limit, true = _fuzzy) do
# WARNING: the *_only parts match our current implementation, but other servers
# appears to be more lenient what those activities can refer to. Atm
# deleting those is no loss since we don't udnerstand them anyway, but
# should this chagne this absolutetly needs adjusting!!!
qdel_useronly =
from(
a in Pleroma.Activity,
select: a.id,
left_join: u in Pleroma.User,
on: fragment("?->>'object'", a.data) == u.ap_id,
where: is_nil(u.id)
)
|> restrict_to_local_singles_type(["Block", "Move"])
|> maybe_limit(limit)
{del_useronly, _} = prune_activity_by_id(qdel_useronly)
Logger.info("- Deleted #{del_useronly} activities related to users")
qdel_actonly =
from(
a in Pleroma.Activity,
select: a.id,
left_join: a2 in Pleroma.Activity,
on: fragment("?->>'object' = ?->>'id'", a.data, a2.data),
where: is_nil(a2.id)
)
|> restrict_to_local_singles_type(["Accept", "Reject"])
|> maybe_limit(limit)
{del_actonly, _} = prune_activity_by_id(qdel_actonly)
Logger.info("- Deleted #{del_actonly} activities related to other activities")
qdel_objonly =
from(
a in Pleroma.Activity,
select: a.id,
left_join: o in Pleroma.Object,
on: fragment("?->>'object' = ?->>'id'", a.data, o.data),
where: is_nil(o.id)
)
|> restrict_to_local_singles_type(["Like", "EmojiReact", "Announce"])
|> maybe_limit(limit)
{del_objonly, _} = prune_activity_by_id(qdel_objonly)
Logger.info("- Deleted #{del_actonly} activities related to non-activity objects")
qdel_mixed =
from(
a in Pleroma.Activity,
select: a.id,
left_join: o in Pleroma.Object,
on: fragment("?->>'object' = ?->>'id'", a.data, o.data),
left_join: a2 in Pleroma.Activity,
on: fragment("?->>'object' = ?->>'id'", a.data, a2.data),
left_join: u in Pleroma.User,
on: fragment("?->>'object'", a.data) == u.ap_id,
where:
is_nil(o.id) and
is_nil(a2.id) and
is_nil(u.id)
)
|> restrict_to_local_singles_type(["Create", "Add", "Remove"])
|> maybe_limit(limit)
{del_mixed, _} = prune_activity_by_id(qdel_mixed)
Logger.info("- Deleted #{del_actonly} activities related to various types")
del_useronly + del_actonly + del_objonly + del_mixed
end
defp prune_orphaned_activities_singles(limit, false = _fuzzy) do
%{:num_rows => del_single} =
"""
delete from public.activities
@ -109,7 +199,8 @@ def prune_orphaned_activities(limit \\ 0, opts \\ []) when is_number(limit) do
# Prune activities who link to a single object
del_single =
if Keyword.get(opts, :singles, true) do
prune_orphaned_activities_singles(limit)
fuzzy = Keyword.get(opts, :fuzzy, false)
prune_orphaned_activities_singles(limit, fuzzy)
else
0
end
@ -167,6 +258,7 @@ def run(["prune_orphaned_activities" | args]) do
limit: :integer,
singles: :boolean,
arrays: :boolean,
fuzzy: :boolean
]
)