Merge remote-tracking branch 'oneric/prune-batch' into akko.wtf

This commit is contained in:
Norm 2024-05-14 22:56:23 -04:00
commit f5a8f7ba2d
3 changed files with 158 additions and 97 deletions

View file

@ -80,6 +80,8 @@ when all orphaned activities have been deleted.
### Options
- `--limit n` - Only delete up to `n` activities in each query making up this job, i.e. if this job runs two queries at most `2n` activities will be deleted. Running this task repeatedly in limited batches can help maintain the instances responsiveness while still freeing up some space.
- `--no-singles` - Do not delete activites referencing single objects
- `--no-arrays` - Do not delete activites referencing an array of objects
## Create a conversation for all existing DMs

View file

@ -20,7 +20,7 @@ defmodule Mix.Tasks.Pleroma.Database do
@shortdoc "A collection of database related tasks"
@moduledoc File.read!("docs/docs/administration/CLI_tasks/database.md")
def maybe_limit(query, limit_cnt) do
defp maybe_limit(query, limit_cnt) do
if is_number(limit_cnt) and limit_cnt > 0 do
limit(query, [], ^limit_cnt)
else
@ -28,16 +28,17 @@ def maybe_limit(query, limit_cnt) do
end
end
def prune_orphaned_activities(limit \\ 0) when is_number(limit) do
limit_arg =
if limit > 0 do
"LIMIT #{limit}"
else
""
end
defp limit_statement(limit) when is_number(limit) do
if limit > 0 do
"LIMIT #{limit}"
else
""
end
end
# Prune activities who link to a single object
{:ok, %{:num_rows => del_single}} =
defp prune_orphaned_activities_singles(limit) do
%{:num_rows => del_single} =
"""
delete from public.activities
where id in (
@ -50,18 +51,23 @@ def prune_orphaned_activities(limit \\ 0) when is_number(limit) do
and o.id is null
and a2.id is null
and u.id is null
#{limit_arg}
#{limit_statement(limit)}
)
"""
|> Repo.query([], timeout: :infinity)
|> Repo.query!([], timeout: :infinity)
# Prune activities who link to an array of objects
{:ok, %{:num_rows => del_array}} =
Logger.info("Prune activity singles: deteleted #{del_single} rows...")
del_single
end
defp prune_orphaned_activities_array(limit) do
%{:num_rows => del_array} =
"""
delete from public.activities
where id in (
select a.id from public.activities a
join json_array_elements_text((a."data" -> 'object')::json) as j on jsonb_typeof(a."data" -> 'object') = 'array'
join json_array_elements_text((a."data" -> 'object')::json) as j
on a.data->>'type' = 'Flag'
left join public.objects o on j.value = o.data ->> 'id'
left join public.activities a2 on j.value = a2.data ->> 'id'
left join public.users u on j.value = u.ap_id
@ -69,10 +75,44 @@ def prune_orphaned_activities(limit \\ 0) when is_number(limit) do
having max(o.data ->> 'id') is null
and max(a2.data ->> 'id') is null
and max(u.ap_id) is null
#{limit_arg}
#{limit_statement(limit)}
)
"""
|> Repo.query([], timeout: :infinity)
|> Repo.query!([], timeout: :infinity)
Logger.info("Prune activity arrays: deteleted #{del_array} rows...")
del_array
end
def prune_orphaned_activities(limit \\ 0, opts \\ []) when is_number(limit) do
# Activities can either refer to a single object id, and array of object ids
# or contain an inlined object (at least after going through our normalisation)
#
# Flag is the only type we support with an array (and always has arrays).
# Update the only one with inlined objects, but old Update activities are
#
# We already regularly purge old Delte, Undo, Update and Remove and if
# rejected Follow requests anyway; no need to explicitly deal with those here.
#
# Since theres an index on types and there are typically only few Flag
# activites, its _much_ faster to utilise the index. To avoid accidentally
# deleting useful activities should more types be added, keep typeof for singles.
# Prune activities who link to an array of objects
del_array =
if Keyword.get(opts, :arrays, true) do
prune_orphaned_activities_array(limit)
else
0
end
# Prune activities who link to a single object
del_single =
if Keyword.get(opts, :singles, true) do
prune_orphaned_activities_singles(limit)
else
0
end
del_single + del_array
end
@ -124,13 +164,15 @@ def run(["prune_orphaned_activities" | args]) do
OptionParser.parse(
args,
strict: [
limit: :integer
limit: :integer,
singles: :boolean,
arrays: :boolean,
]
)
start_pleroma()
limit = Keyword.get(options, :limit, 0)
{limit, options} = Keyword.pop(options, :limit, 0)
log_message = "Pruning orphaned activities"
@ -143,7 +185,7 @@ def run(["prune_orphaned_activities" | args]) do
Logger.info(log_message)
deleted = prune_orphaned_activities(limit)
deleted = prune_orphaned_activities(limit, options)
Logger.info("Deleted #{deleted} rows")
end
@ -208,102 +250,115 @@ def run(["prune_objects" | args]) do
Logger.info(log_message)
if Keyword.get(options, :keep_threads) do
# We want to delete objects from threads where
# 1. the newest post is still old
# 2. none of the activities is local
# 3. none of the activities is bookmarked
# 4. optionally none of the posts is non-public
deletable_context =
if Keyword.get(options, :keep_non_public) do
Pleroma.Activity
|> join(:left, [a], b in Pleroma.Bookmark, on: a.id == b.activity_id)
|> group_by([a], fragment("? ->> 'context'::text", a.data))
|> having(
[a],
not fragment(
# Posts (checked on Create Activity) is non-public
"bool_or((not(?->'to' \\? ? OR ?->'cc' \\? ?)) and ? ->> 'type' = 'Create')",
a.data,
^Pleroma.Constants.as_public(),
a.data,
^Pleroma.Constants.as_public(),
a.data
{del_obj, _} =
if Keyword.get(options, :keep_threads) do
# We want to delete objects from threads where
# 1. the newest post is still old
# 2. none of the activities is local
# 3. none of the activities is bookmarked
# 4. optionally none of the posts is non-public
deletable_context =
if Keyword.get(options, :keep_non_public) do
Pleroma.Activity
|> join(:left, [a], b in Pleroma.Bookmark, on: a.id == b.activity_id)
|> group_by([a], fragment("? ->> 'context'::text", a.data))
|> having(
[a],
not fragment(
# Posts (checked on Create Activity) is non-public
"bool_or((not(?->'to' \\? ? OR ?->'cc' \\? ?)) and ? ->> 'type' = 'Create')",
a.data,
^Pleroma.Constants.as_public(),
a.data,
^Pleroma.Constants.as_public(),
a.data
)
)
)
else
Pleroma.Activity
|> join(:left, [a], b in Pleroma.Bookmark, on: a.id == b.activity_id)
|> group_by([a], fragment("? ->> 'context'::text", a.data))
end
|> having([a], max(a.updated_at) < ^time_deadline)
|> having([a], not fragment("bool_or(?)", a.local))
|> having([_, b], fragment("max(?::text) is null", b.id))
|> maybe_limit(limit_cnt)
|> select([a], fragment("? ->> 'context'::text", a.data))
else
Pleroma.Activity
|> join(:left, [a], b in Pleroma.Bookmark, on: a.id == b.activity_id)
|> group_by([a], fragment("? ->> 'context'::text", a.data))
end
|> having([a], max(a.updated_at) < ^time_deadline)
|> having([a], not fragment("bool_or(?)", a.local))
|> having([_, b], fragment("max(?::text) is null", b.id))
|> maybe_limit(limit_cnt)
|> select([a], fragment("? ->> 'context'::text", a.data))
Pleroma.Object
|> where([o], fragment("? ->> 'context'::text", o.data) in subquery(deletable_context))
else
deletable =
if Keyword.get(options, :keep_non_public) do
Pleroma.Object
Pleroma.Object
|> where([o], fragment("? ->> 'context'::text", o.data) in subquery(deletable_context))
else
deletable =
if Keyword.get(options, :keep_non_public) do
Pleroma.Object
|> where(
[o],
fragment(
"?->'to' \\? ? OR ?->'cc' \\? ?",
o.data,
^Pleroma.Constants.as_public(),
o.data,
^Pleroma.Constants.as_public()
)
)
else
Pleroma.Object
end
|> where([o], o.updated_at < ^time_deadline)
|> where(
[o],
fragment(
"?->'to' \\? ? OR ?->'cc' \\? ?",
o.data,
^Pleroma.Constants.as_public(),
o.data,
^Pleroma.Constants.as_public()
)
fragment("split_part(?->>'actor', '/', 3) != ?", o.data, ^Pleroma.Web.Endpoint.host())
)
else
Pleroma.Object
end
|> where([o], o.updated_at < ^time_deadline)
|> where(
[o],
fragment("split_part(?->>'actor', '/', 3) != ?", o.data, ^Pleroma.Web.Endpoint.host())
)
|> maybe_limit(limit_cnt)
|> select([o], o.id)
|> maybe_limit(limit_cnt)
|> select([o], o.id)
Pleroma.Object
|> where([o], o.id in subquery(deletable))
end
|> Repo.delete_all(timeout: :infinity)
Pleroma.Object
|> where([o], o.id in subquery(deletable))
end
|> Repo.delete_all(timeout: :infinity)
Logger.info("Deleted #{del_obj} objects...")
if !Keyword.get(options, :keep_threads) do
# Without the --keep-threads option, it's possible that bookmarked
# objects have been deleted. We remove the corresponding bookmarks.
"""
delete from public.bookmarks
where id in (
select b.id from public.bookmarks b
left join public.activities a on b.activity_id = a.id
left join public.objects o on a."data" ->> 'object' = o.data ->> 'id'
where o.id is null
)
"""
|> Repo.query([], timeout: :infinity)
%{:num_rows => del_bookmarks} =
"""
delete from public.bookmarks
where id in (
select b.id from public.bookmarks b
left join public.activities a on b.activity_id = a.id
left join public.objects o on a."data" ->> 'object' = o.data ->> 'id'
where o.id is null
)
"""
|> Repo.query!([], timeout: :infinity)
Logger.info("Deleted #{del_bookmarks} orphaned bookmarks...")
end
if Keyword.get(options, :prune_orphaned_activities) do
prune_orphaned_activities()
del_activities = prune_orphaned_activities()
Logger.info("Deleted #{del_activities} orphaned activities...")
end
"""
DELETE FROM hashtags AS ht
WHERE NOT EXISTS (
SELECT 1 FROM hashtags_objects hto
WHERE ht.id = hto.hashtag_id)
"""
|> Repo.query()
%{:num_rows => del_hashtags} =
"""
DELETE FROM hashtags AS ht
WHERE NOT EXISTS (
SELECT 1 FROM hashtags_objects hto
WHERE ht.id = hto.hashtag_id)
"""
|> Repo.query!()
Logger.info("Deleted #{del_hashtags} no longer used hashtags...")
if Keyword.get(options, :vacuum) do
Logger.info("Starting vacuum...")
Maintenance.vacuum("full")
end
Logger.info("All done!")
end
def run(["prune_task"]) do

View file

@ -478,6 +478,7 @@ test "it prunes orphaned activities with prune_orphaned_activities when the obje
|> Map.merge(%{
local: false,
data: %{
"type" => "Flag",
"id" => "remote_activity_existing_object",
"object" => ["non_ existing_object", "existing_object"]
}
@ -488,6 +489,7 @@ test "it prunes orphaned activities with prune_orphaned_activities when the obje
|> Map.merge(%{
local: false,
data: %{
"type" => "Flag",
"id" => "remote_activity_existing_actor",
"object" => ["non_ existing_object", "existing_actor"]
}
@ -498,6 +500,7 @@ test "it prunes orphaned activities with prune_orphaned_activities when the obje
|> Map.merge(%{
local: false,
data: %{
"type" => "Flag",
"id" => "remote_activity_existing_activity",
"object" => ["non_ existing_object", "remote_activity_existing_actor"]
}
@ -508,6 +511,7 @@ test "it prunes orphaned activities with prune_orphaned_activities when the obje
|> Map.merge(%{
local: false,
data: %{
"type" => "Flag",
"id" => "remote_activity_without_existing_referenced_object",
"object" => ["owo", "whats_this"]
}