diff --git a/docs/docs/administration/CLI_tasks/database.md b/docs/docs/administration/CLI_tasks/database.md index bbf29fc60..580c9d32b 100644 --- a/docs/docs/administration/CLI_tasks/database.md +++ b/docs/docs/administration/CLI_tasks/database.md @@ -80,6 +80,8 @@ when all orphaned activities have been deleted. ### Options - `--limit n` - Only delete up to `n` activities in each query making up this job, i.e. if this job runs two queries at most `2n` activities will be deleted. Running this task repeatedly in limited batches can help maintain the instance’s responsiveness while still freeing up some space. +- `--no-singles` - Do not delete activites referencing single objects +- `--no-arrays` - Do not delete activites referencing an array of objects ## Create a conversation for all existing DMs diff --git a/lib/mix/tasks/pleroma/database.ex b/lib/mix/tasks/pleroma/database.ex index 218a4f297..bd545d617 100644 --- a/lib/mix/tasks/pleroma/database.ex +++ b/lib/mix/tasks/pleroma/database.ex @@ -20,7 +20,7 @@ defmodule Mix.Tasks.Pleroma.Database do @shortdoc "A collection of database related tasks" @moduledoc File.read!("docs/docs/administration/CLI_tasks/database.md") - def maybe_limit(query, limit_cnt) do + defp maybe_limit(query, limit_cnt) do if is_number(limit_cnt) and limit_cnt > 0 do limit(query, [], ^limit_cnt) else @@ -28,16 +28,17 @@ defmodule Mix.Tasks.Pleroma.Database do end end - def prune_orphaned_activities(limit \\ 0) when is_number(limit) do - limit_arg = - if limit > 0 do - "LIMIT #{limit}" - else - "" - end + defp limit_statement(limit) when is_number(limit) do + if limit > 0 do + "LIMIT #{limit}" + else + "" + end + end - # Prune activities who link to a single object - {:ok, %{:num_rows => del_single}} = + + defp prune_orphaned_activities_singles(limit) do + %{:num_rows => del_single} = """ delete from public.activities where id in ( @@ -50,18 +51,23 @@ defmodule Mix.Tasks.Pleroma.Database do and o.id is null and a2.id is null and u.id is null - #{limit_arg} + #{limit_statement(limit)} ) """ - |> Repo.query([], timeout: :infinity) + |> Repo.query!([], timeout: :infinity) - # Prune activities who link to an array of objects - {:ok, %{:num_rows => del_array}} = + Logger.info("Prune activity singles: deteleted #{del_single} rows...") + del_single + end + + defp prune_orphaned_activities_array(limit) do + %{:num_rows => del_array} = """ delete from public.activities where id in ( select a.id from public.activities a - join json_array_elements_text((a."data" -> 'object')::json) as j on jsonb_typeof(a."data" -> 'object') = 'array' + join json_array_elements_text((a."data" -> 'object')::json) as j + on a.data->>'type' = 'Flag' left join public.objects o on j.value = o.data ->> 'id' left join public.activities a2 on j.value = a2.data ->> 'id' left join public.users u on j.value = u.ap_id @@ -69,10 +75,44 @@ defmodule Mix.Tasks.Pleroma.Database do having max(o.data ->> 'id') is null and max(a2.data ->> 'id') is null and max(u.ap_id) is null - #{limit_arg} + #{limit_statement(limit)} ) """ - |> Repo.query([], timeout: :infinity) + |> Repo.query!([], timeout: :infinity) + + Logger.info("Prune activity arrays: deteleted #{del_array} rows...") + del_array + end + + def prune_orphaned_activities(limit \\ 0, opts \\ []) when is_number(limit) do + # Activities can either refer to a single object id, and array of object ids + # or contain an inlined object (at least after going through our normalisation) + # + # Flag is the only type we support with an array (and always has arrays). + # Update the only one with inlined objects, but old Update activities are + # + # We already regularly purge old Delte, Undo, Update and Remove and if + # rejected Follow requests anyway; no need to explicitly deal with those here. + # + # Since there’s an index on types and there are typically only few Flag + # activites, it’s _much_ faster to utilise the index. To avoid accidentally + # deleting useful activities should more types be added, keep typeof for singles. + + # Prune activities who link to an array of objects + del_array = + if Keyword.get(opts, :arrays, true) do + prune_orphaned_activities_array(limit) + else + 0 + end + + # Prune activities who link to a single object + del_single = + if Keyword.get(opts, :singles, true) do + prune_orphaned_activities_singles(limit) + else + 0 + end del_single + del_array end @@ -124,13 +164,15 @@ defmodule Mix.Tasks.Pleroma.Database do OptionParser.parse( args, strict: [ - limit: :integer + limit: :integer, + singles: :boolean, + arrays: :boolean, ] ) start_pleroma() - limit = Keyword.get(options, :limit, 0) + {limit, options} = Keyword.pop(options, :limit, 0) log_message = "Pruning orphaned activities" @@ -143,7 +185,7 @@ defmodule Mix.Tasks.Pleroma.Database do Logger.info(log_message) - deleted = prune_orphaned_activities(limit) + deleted = prune_orphaned_activities(limit, options) Logger.info("Deleted #{deleted} rows") end @@ -208,102 +250,115 @@ defmodule Mix.Tasks.Pleroma.Database do Logger.info(log_message) - if Keyword.get(options, :keep_threads) do - # We want to delete objects from threads where - # 1. the newest post is still old - # 2. none of the activities is local - # 3. none of the activities is bookmarked - # 4. optionally none of the posts is non-public - deletable_context = - if Keyword.get(options, :keep_non_public) do - Pleroma.Activity - |> join(:left, [a], b in Pleroma.Bookmark, on: a.id == b.activity_id) - |> group_by([a], fragment("? ->> 'context'::text", a.data)) - |> having( - [a], - not fragment( - # Posts (checked on Create Activity) is non-public - "bool_or((not(?->'to' \\? ? OR ?->'cc' \\? ?)) and ? ->> 'type' = 'Create')", - a.data, - ^Pleroma.Constants.as_public(), - a.data, - ^Pleroma.Constants.as_public(), - a.data + {del_obj, _} = + if Keyword.get(options, :keep_threads) do + # We want to delete objects from threads where + # 1. the newest post is still old + # 2. none of the activities is local + # 3. none of the activities is bookmarked + # 4. optionally none of the posts is non-public + deletable_context = + if Keyword.get(options, :keep_non_public) do + Pleroma.Activity + |> join(:left, [a], b in Pleroma.Bookmark, on: a.id == b.activity_id) + |> group_by([a], fragment("? ->> 'context'::text", a.data)) + |> having( + [a], + not fragment( + # Posts (checked on Create Activity) is non-public + "bool_or((not(?->'to' \\? ? OR ?->'cc' \\? ?)) and ? ->> 'type' = 'Create')", + a.data, + ^Pleroma.Constants.as_public(), + a.data, + ^Pleroma.Constants.as_public(), + a.data + ) ) - ) - else - Pleroma.Activity - |> join(:left, [a], b in Pleroma.Bookmark, on: a.id == b.activity_id) - |> group_by([a], fragment("? ->> 'context'::text", a.data)) - end - |> having([a], max(a.updated_at) < ^time_deadline) - |> having([a], not fragment("bool_or(?)", a.local)) - |> having([_, b], fragment("max(?::text) is null", b.id)) - |> maybe_limit(limit_cnt) - |> select([a], fragment("? ->> 'context'::text", a.data)) + else + Pleroma.Activity + |> join(:left, [a], b in Pleroma.Bookmark, on: a.id == b.activity_id) + |> group_by([a], fragment("? ->> 'context'::text", a.data)) + end + |> having([a], max(a.updated_at) < ^time_deadline) + |> having([a], not fragment("bool_or(?)", a.local)) + |> having([_, b], fragment("max(?::text) is null", b.id)) + |> maybe_limit(limit_cnt) + |> select([a], fragment("? ->> 'context'::text", a.data)) - Pleroma.Object - |> where([o], fragment("? ->> 'context'::text", o.data) in subquery(deletable_context)) - else - deletable = - if Keyword.get(options, :keep_non_public) do - Pleroma.Object + Pleroma.Object + |> where([o], fragment("? ->> 'context'::text", o.data) in subquery(deletable_context)) + else + deletable = + if Keyword.get(options, :keep_non_public) do + Pleroma.Object + |> where( + [o], + fragment( + "?->'to' \\? ? OR ?->'cc' \\? ?", + o.data, + ^Pleroma.Constants.as_public(), + o.data, + ^Pleroma.Constants.as_public() + ) + ) + else + Pleroma.Object + end + |> where([o], o.updated_at < ^time_deadline) |> where( [o], - fragment( - "?->'to' \\? ? OR ?->'cc' \\? ?", - o.data, - ^Pleroma.Constants.as_public(), - o.data, - ^Pleroma.Constants.as_public() - ) + fragment("split_part(?->>'actor', '/', 3) != ?", o.data, ^Pleroma.Web.Endpoint.host()) ) - else - Pleroma.Object - end - |> where([o], o.updated_at < ^time_deadline) - |> where( - [o], - fragment("split_part(?->>'actor', '/', 3) != ?", o.data, ^Pleroma.Web.Endpoint.host()) - ) - |> maybe_limit(limit_cnt) - |> select([o], o.id) + |> maybe_limit(limit_cnt) + |> select([o], o.id) - Pleroma.Object - |> where([o], o.id in subquery(deletable)) - end - |> Repo.delete_all(timeout: :infinity) + Pleroma.Object + |> where([o], o.id in subquery(deletable)) + end + |> Repo.delete_all(timeout: :infinity) + + Logger.info("Deleted #{del_obj} objects...") if !Keyword.get(options, :keep_threads) do # Without the --keep-threads option, it's possible that bookmarked # objects have been deleted. We remove the corresponding bookmarks. - """ - delete from public.bookmarks - where id in ( - select b.id from public.bookmarks b - left join public.activities a on b.activity_id = a.id - left join public.objects o on a."data" ->> 'object' = o.data ->> 'id' - where o.id is null - ) - """ - |> Repo.query([], timeout: :infinity) + %{:num_rows => del_bookmarks} = + """ + delete from public.bookmarks + where id in ( + select b.id from public.bookmarks b + left join public.activities a on b.activity_id = a.id + left join public.objects o on a."data" ->> 'object' = o.data ->> 'id' + where o.id is null + ) + """ + |> Repo.query!([], timeout: :infinity) + + Logger.info("Deleted #{del_bookmarks} orphaned bookmarks...") end if Keyword.get(options, :prune_orphaned_activities) do - prune_orphaned_activities() + del_activities = prune_orphaned_activities() + Logger.info("Deleted #{del_activities} orphaned activities...") end - """ - DELETE FROM hashtags AS ht - WHERE NOT EXISTS ( - SELECT 1 FROM hashtags_objects hto - WHERE ht.id = hto.hashtag_id) - """ - |> Repo.query() + %{:num_rows => del_hashtags} = + """ + DELETE FROM hashtags AS ht + WHERE NOT EXISTS ( + SELECT 1 FROM hashtags_objects hto + WHERE ht.id = hto.hashtag_id) + """ + |> Repo.query!() + + Logger.info("Deleted #{del_hashtags} no longer used hashtags...") if Keyword.get(options, :vacuum) do + Logger.info("Starting vacuum...") Maintenance.vacuum("full") end + + Logger.info("All done!") end def run(["prune_task"]) do diff --git a/test/mix/tasks/pleroma/database_test.exs b/test/mix/tasks/pleroma/database_test.exs index cbb40f3e8..c9163e42f 100644 --- a/test/mix/tasks/pleroma/database_test.exs +++ b/test/mix/tasks/pleroma/database_test.exs @@ -478,6 +478,7 @@ defmodule Mix.Tasks.Pleroma.DatabaseTest do |> Map.merge(%{ local: false, data: %{ + "type" => "Flag", "id" => "remote_activity_existing_object", "object" => ["non_ existing_object", "existing_object"] } @@ -488,6 +489,7 @@ defmodule Mix.Tasks.Pleroma.DatabaseTest do |> Map.merge(%{ local: false, data: %{ + "type" => "Flag", "id" => "remote_activity_existing_actor", "object" => ["non_ existing_object", "existing_actor"] } @@ -498,6 +500,7 @@ defmodule Mix.Tasks.Pleroma.DatabaseTest do |> Map.merge(%{ local: false, data: %{ + "type" => "Flag", "id" => "remote_activity_existing_activity", "object" => ["non_ existing_object", "remote_activity_existing_actor"] } @@ -508,6 +511,7 @@ defmodule Mix.Tasks.Pleroma.DatabaseTest do |> Map.merge(%{ local: false, data: %{ + "type" => "Flag", "id" => "remote_activity_without_existing_referenced_object", "object" => ["owo", "whats_this"] }