From aeaebb566c9e9e5b34075b2700a5fb4bd0d0e3c9 Mon Sep 17 00:00:00 2001 From: Oneric Date: Wed, 15 May 2024 02:15:31 +0200 Subject: [PATCH] dbprune: allow splitting array and single activity prunes The former is typically just a few reports; it doesn't make sense to rerun it over and over again in batched prunes or if a full prune OOMed. --- .../docs/administration/CLI_tasks/database.md | 2 + lib/mix/tasks/pleroma/database.ex | 77 ++++++++++++------- 2 files changed, 52 insertions(+), 27 deletions(-) diff --git a/docs/docs/administration/CLI_tasks/database.md b/docs/docs/administration/CLI_tasks/database.md index bbf29fc60..580c9d32b 100644 --- a/docs/docs/administration/CLI_tasks/database.md +++ b/docs/docs/administration/CLI_tasks/database.md @@ -80,6 +80,8 @@ when all orphaned activities have been deleted. ### Options - `--limit n` - Only delete up to `n` activities in each query making up this job, i.e. if this job runs two queries at most `2n` activities will be deleted. Running this task repeatedly in limited batches can help maintain the instance’s responsiveness while still freeing up some space. +- `--no-singles` - Do not delete activites referencing single objects +- `--no-arrays` - Do not delete activites referencing an array of objects ## Create a conversation for all existing DMs diff --git a/lib/mix/tasks/pleroma/database.ex b/lib/mix/tasks/pleroma/database.ex index b4709fa3a..8bf4b38ca 100644 --- a/lib/mix/tasks/pleroma/database.ex +++ b/lib/mix/tasks/pleroma/database.ex @@ -28,28 +28,15 @@ defp maybe_limit(query, limit_cnt) do end end - def prune_orphaned_activities(limit \\ 0) when is_number(limit) do - limit_arg = - if limit > 0 do - "LIMIT #{limit}" - else - "" - end + defp limit_statement(limit) when is_number(limit) do + if limit > 0 do + "LIMIT #{limit}" + else + "" + end + end - # Activities can either refer to a single object id, and array of object ids - # or contain an inlined object (at least after going through our normalisation) - # - # Flag is the only type we support with an array (and always has arrays). - # Update the only one with inlined objects. - # - # We already regularly purge old Delete, Undo, Update and Remove and if - # rejected Follow requests anyway; no need to explicitly deal with those here. - # - # Since there’s an index on types and there are typically only few Flag - # activites, it’s _much_ faster to utilise the index. To avoid accidentally - # deleting useful activities should more types be added, keep typeof for singles. - - # Prune activities who link to a single object + defp prune_orphaned_activities_singles(limit) do %{:num_rows => del_single} = """ delete from public.activities @@ -63,14 +50,16 @@ def prune_orphaned_activities(limit \\ 0) when is_number(limit) do and o.id is null and a2.id is null and u.id is null - #{limit_arg} + #{limit_statement(limit)} ) """ |> Repo.query!([], timeout: :infinity) Logger.info("Prune activity singles: deleted #{del_single} rows...") + del_single + end - # Prune activities who link to an array of objects + defp prune_orphaned_activities_array(limit) do %{:num_rows => del_array} = """ delete from public.activities @@ -85,12 +74,44 @@ def prune_orphaned_activities(limit \\ 0) when is_number(limit) do having max(o.data ->> 'id') is null and max(a2.data ->> 'id') is null and max(u.ap_id) is null - #{limit_arg} + #{limit_statement(limit)} ) """ |> Repo.query!([], timeout: :infinity) Logger.info("Prune activity arrays: deleted #{del_array} rows...") + del_array + end + + def prune_orphaned_activities(limit \\ 0, opts \\ []) when is_number(limit) do + # Activities can either refer to a single object id, and array of object ids + # or contain an inlined object (at least after going through our normalisation) + # + # Flag is the only type we support with an array (and always has arrays). + # Update the only one with inlined objects. + # + # We already regularly purge old Delete, Undo, Update and Remove and if + # rejected Follow requests anyway; no need to explicitly deal with those here. + # + # Since there’s an index on types and there are typically only few Flag + # activites, it’s _much_ faster to utilise the index. To avoid accidentally + # deleting useful activities should more types be added, keep typeof for singles. + + # Prune activities who link to a single object + del_single = + if Keyword.get(opts, :singles, true) do + prune_orphaned_activities_singles(limit) + else + 0 + end + + # Prune activities who link to an array of objects + del_array = + if Keyword.get(opts, :arrays, true) do + prune_orphaned_activities_array(limit) + else + 0 + end del_single + del_array end @@ -142,13 +163,15 @@ def run(["prune_orphaned_activities" | args]) do OptionParser.parse( args, strict: [ - limit: :integer + limit: :integer, + singles: :boolean, + arrays: :boolean ] ) start_pleroma() - limit = Keyword.get(options, :limit, 0) + {limit, options} = Keyword.pop(options, :limit, 0) log_message = "Pruning orphaned activities" @@ -161,7 +184,7 @@ def run(["prune_orphaned_activities" | args]) do Logger.info(log_message) - deleted = prune_orphaned_activities(limit) + deleted = prune_orphaned_activities(limit, options) Logger.info("Deleted #{deleted} rows") end