From 2e384dcda92a6e6c2c9161d3a9c993f70df8c198 Mon Sep 17 00:00:00 2001 From: Oneric Date: Wed, 15 May 2024 03:38:58 +0200 Subject: [PATCH] [TEST] dbprune: fuzzymode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Splitting it up hopefully helps with load and OOMs (on smaller VPSes). But untested. Also: technically can delete activities referencing actually existing objects, but atm won't happen for anything we understand in the first place. But risks future changes leading to loss of (remote) data..... Local activities are never affected though Less severe this might also miss some technically deleteable entries if they’ll later get dealt with by the regular auto pruning anyway. --- .../docs/administration/CLI_tasks/database.md | 1 + lib/mix/tasks/pleroma/database.ex | 96 ++++++++++++++++++- 2 files changed, 95 insertions(+), 2 deletions(-) diff --git a/docs/docs/administration/CLI_tasks/database.md b/docs/docs/administration/CLI_tasks/database.md index 580c9d32b..8c134e086 100644 --- a/docs/docs/administration/CLI_tasks/database.md +++ b/docs/docs/administration/CLI_tasks/database.md @@ -82,6 +82,7 @@ when all orphaned activities have been deleted. - `--limit n` - Only delete up to `n` activities in each query making up this job, i.e. if this job runs two queries at most `2n` activities will be deleted. Running this task repeatedly in limited batches can help maintain the instance’s responsiveness while still freeing up some space. - `--no-singles` - Do not delete activites referencing single objects - `--no-arrays` - Do not delete activites referencing an array of objects +- `--fuzzy` - Run fuzzy delete subqueries where supported; potentially more resource friendly but might miss a few activities or rarely delete some actually referenced remote activities (local data is never deleted) ## Create a conversation for all existing DMs diff --git a/lib/mix/tasks/pleroma/database.ex b/lib/mix/tasks/pleroma/database.ex index bd545d617..10ab4b7b4 100644 --- a/lib/mix/tasks/pleroma/database.ex +++ b/lib/mix/tasks/pleroma/database.ex @@ -36,8 +36,98 @@ defp limit_statement(limit) when is_number(limit) do end end + defp restrict_to_local_singles_type(query, types) do + query + |> where( + [a], + not a.local and + fragment("?->>'type' = ANY(?)", a.data, ^types) and + fragment("jsonb_typeof(?->'object') = 'string'", a.data) + ) + end - defp prune_orphaned_activities_singles(limit) do + defp prune_activity_by_id(idquery) do + from( + a in Pleroma.Activity, + where: a.id in subquery(idquery) + ) + |> Repo.delete_all() + end + + defp prune_orphaned_activities_singles(limit, opts) + + defp prune_orphaned_activities_singles(limit, true = _fuzzy) do + # WARNING: the *_only parts match our current implementation, but other servers + # appears to be more lenient what those activities can refer to. Atm + # deleting those is no loss since we don't udnerstand them anyway, but + # should this chagne this absolutetly needs adjusting!!! + qdel_useronly = + from( + a in Pleroma.Activity, + select: a.id, + left_join: u in Pleroma.User, + on: fragment("?->>'object'", a.data) == u.ap_id, + where: is_nil(u.id) + ) + |> restrict_to_local_singles_type(["Block", "Move"]) + |> maybe_limit(limit) + + {del_useronly, _} = prune_activity_by_id(qdel_useronly) + Logger.info("- Deleted #{del_useronly} activities related to users") + + qdel_actonly = + from( + a in Pleroma.Activity, + select: a.id, + left_join: a2 in Pleroma.Activity, + on: fragment("?->>'object' = ?->>'id'", a.data, a2.data), + where: is_nil(a2.id) + ) + |> restrict_to_local_singles_type(["Accept", "Reject"]) + |> maybe_limit(limit) + + {del_actonly, _} = prune_activity_by_id(qdel_actonly) + Logger.info("- Deleted #{del_actonly} activities related to other activities") + + qdel_objonly = + from( + a in Pleroma.Activity, + select: a.id, + left_join: o in Pleroma.Object, + on: fragment("?->>'object' = ?->>'id'", a.data, o.data), + where: is_nil(o.id) + ) + |> restrict_to_local_singles_type(["Like", "EmojiReact", "Announce"]) + |> maybe_limit(limit) + + {del_objonly, _} = prune_activity_by_id(qdel_objonly) + Logger.info("- Deleted #{del_actonly} activities related to non-activity objects") + + qdel_mixed = + from( + a in Pleroma.Activity, + select: a.id, + left_join: o in Pleroma.Object, + on: fragment("?->>'object' = ?->>'id'", a.data, o.data), + left_join: a2 in Pleroma.Activity, + on: fragment("?->>'object' = ?->>'id'", a.data, a2.data), + left_join: u in Pleroma.User, + on: fragment("?->>'object'", a.data) == u.ap_id, + where: + is_nil(o.id) and + is_nil(a2.id) and + is_nil(u.id) + ) + |> restrict_to_local_singles_type(["Create", "Add", "Remove"]) + |> maybe_limit(limit) + + {del_mixed, _} = prune_activity_by_id(qdel_mixed) + Logger.info("- Deleted #{del_actonly} activities related to various types") + + del_useronly + del_actonly + del_objonly + del_mixed + end + + defp prune_orphaned_activities_singles(limit, false = _fuzzy) do %{:num_rows => del_single} = """ delete from public.activities @@ -109,7 +199,8 @@ def prune_orphaned_activities(limit \\ 0, opts \\ []) when is_number(limit) do # Prune activities who link to a single object del_single = if Keyword.get(opts, :singles, true) do - prune_orphaned_activities_singles(limit) + fuzzy = Keyword.get(opts, :fuzzy, false) + prune_orphaned_activities_singles(limit, fuzzy) else 0 end @@ -167,6 +258,7 @@ def run(["prune_orphaned_activities" | args]) do limit: :integer, singles: :boolean, arrays: :boolean, + fuzzy: :boolean ] )