dbprune: allow splitting array and single activity prunes

The former is typically just a few reports; it doesn't make sense to
rerun it over and over again in batched prunes or if a full prune OOMed.
This commit is contained in:
Oneric 2024-05-15 02:15:31 +02:00
parent 5751637926
commit aeaebb566c
2 changed files with 52 additions and 27 deletions

View file

@ -80,6 +80,8 @@ when all orphaned activities have been deleted.
### Options ### Options
- `--limit n` - Only delete up to `n` activities in each query making up this job, i.e. if this job runs two queries at most `2n` activities will be deleted. Running this task repeatedly in limited batches can help maintain the instances responsiveness while still freeing up some space. - `--limit n` - Only delete up to `n` activities in each query making up this job, i.e. if this job runs two queries at most `2n` activities will be deleted. Running this task repeatedly in limited batches can help maintain the instances responsiveness while still freeing up some space.
- `--no-singles` - Do not delete activites referencing single objects
- `--no-arrays` - Do not delete activites referencing an array of objects
## Create a conversation for all existing DMs ## Create a conversation for all existing DMs

View file

@ -28,28 +28,15 @@ defp maybe_limit(query, limit_cnt) do
end end
end end
def prune_orphaned_activities(limit \\ 0) when is_number(limit) do defp limit_statement(limit) when is_number(limit) do
limit_arg = if limit > 0 do
if limit > 0 do "LIMIT #{limit}"
"LIMIT #{limit}" else
else ""
"" end
end end
# Activities can either refer to a single object id, and array of object ids defp prune_orphaned_activities_singles(limit) do
# or contain an inlined object (at least after going through our normalisation)
#
# Flag is the only type we support with an array (and always has arrays).
# Update the only one with inlined objects.
#
# We already regularly purge old Delete, Undo, Update and Remove and if
# rejected Follow requests anyway; no need to explicitly deal with those here.
#
# Since theres an index on types and there are typically only few Flag
# activites, its _much_ faster to utilise the index. To avoid accidentally
# deleting useful activities should more types be added, keep typeof for singles.
# Prune activities who link to a single object
%{:num_rows => del_single} = %{:num_rows => del_single} =
""" """
delete from public.activities delete from public.activities
@ -63,14 +50,16 @@ def prune_orphaned_activities(limit \\ 0) when is_number(limit) do
and o.id is null and o.id is null
and a2.id is null and a2.id is null
and u.id is null and u.id is null
#{limit_arg} #{limit_statement(limit)}
) )
""" """
|> Repo.query!([], timeout: :infinity) |> Repo.query!([], timeout: :infinity)
Logger.info("Prune activity singles: deleted #{del_single} rows...") Logger.info("Prune activity singles: deleted #{del_single} rows...")
del_single
end
# Prune activities who link to an array of objects defp prune_orphaned_activities_array(limit) do
%{:num_rows => del_array} = %{:num_rows => del_array} =
""" """
delete from public.activities delete from public.activities
@ -85,12 +74,44 @@ def prune_orphaned_activities(limit \\ 0) when is_number(limit) do
having max(o.data ->> 'id') is null having max(o.data ->> 'id') is null
and max(a2.data ->> 'id') is null and max(a2.data ->> 'id') is null
and max(u.ap_id) is null and max(u.ap_id) is null
#{limit_arg} #{limit_statement(limit)}
) )
""" """
|> Repo.query!([], timeout: :infinity) |> Repo.query!([], timeout: :infinity)
Logger.info("Prune activity arrays: deleted #{del_array} rows...") Logger.info("Prune activity arrays: deleted #{del_array} rows...")
del_array
end
def prune_orphaned_activities(limit \\ 0, opts \\ []) when is_number(limit) do
# Activities can either refer to a single object id, and array of object ids
# or contain an inlined object (at least after going through our normalisation)
#
# Flag is the only type we support with an array (and always has arrays).
# Update the only one with inlined objects.
#
# We already regularly purge old Delete, Undo, Update and Remove and if
# rejected Follow requests anyway; no need to explicitly deal with those here.
#
# Since theres an index on types and there are typically only few Flag
# activites, its _much_ faster to utilise the index. To avoid accidentally
# deleting useful activities should more types be added, keep typeof for singles.
# Prune activities who link to a single object
del_single =
if Keyword.get(opts, :singles, true) do
prune_orphaned_activities_singles(limit)
else
0
end
# Prune activities who link to an array of objects
del_array =
if Keyword.get(opts, :arrays, true) do
prune_orphaned_activities_array(limit)
else
0
end
del_single + del_array del_single + del_array
end end
@ -142,13 +163,15 @@ def run(["prune_orphaned_activities" | args]) do
OptionParser.parse( OptionParser.parse(
args, args,
strict: [ strict: [
limit: :integer limit: :integer,
singles: :boolean,
arrays: :boolean
] ]
) )
start_pleroma() start_pleroma()
limit = Keyword.get(options, :limit, 0) {limit, options} = Keyword.pop(options, :limit, 0)
log_message = "Pruning orphaned activities" log_message = "Pruning orphaned activities"
@ -161,7 +184,7 @@ def run(["prune_orphaned_activities" | args]) do
Logger.info(log_message) Logger.info(log_message)
deleted = prune_orphaned_activities(limit) deleted = prune_orphaned_activities(limit, options)
Logger.info("Deleted #{deleted} rows") Logger.info("Deleted #{deleted} rows")
end end