diff --git a/lib/mix/tasks/pleroma/database.ex b/lib/mix/tasks/pleroma/database.ex index 0f428ca03..726a22d41 100644 --- a/lib/mix/tasks/pleroma/database.ex +++ b/lib/mix/tasks/pleroma/database.ex @@ -172,35 +172,48 @@ defmodule Mix.Tasks.Pleroma.Database do |> Repo.delete_all(timeout: :infinity) if Keyword.get(options, :prune_orphaned_activities) do + # Prune activities who link to a single object """ delete from public.activities where id in ( - select a.id from public.activities a - left join public.objects o on a.data ->> 'object' = o.data ->> 'id' - left join public.activities a2 on a.data ->> 'object' = a2.data ->> 'id' - left join public.users u on a.data ->> 'object' = u.ap_id - -- Only clean up remote activities - where not a.local - -- For now we only focus on activities with direct links to objects - -- e.g. not json objects (in case of embedded objects) or json arrays (in case of multiple objects) - and jsonb_typeof(a."data" -> 'object') = 'string' - -- Find Activities that don't have existing objects - and o.id is null - and a2.id is null - and u.id is null + select a.id from public.activities a + left join public.objects o on a.data ->> 'object' = o.data ->> 'id' + left join public.activities a2 on a.data ->> 'object' = a2.data ->> 'id' + left join public.users u on a.data ->> 'object' = u.ap_id + where not a.local + and jsonb_typeof(a."data" -> 'object') = 'string' + and o.id is null + and a2.id is null + and u.id is null ) """ - |> Repo.query() + |> Repo.query([], timeout: :infinity) + + # Prune activities who link to an array of objects + """ + delete from public.activities + where id in ( + select a.id from public.activities a + join json_array_elements_text((a."data" -> 'object')::json) as j on jsonb_typeof(a."data" -> 'object') = 'array' + left join public.objects o on j.value = o.data ->> 'id' + left join public.activities a2 on j.value = a2.data ->> 'id' + left join public.users u on j.value = u.ap_id + group by a.id + having max(o.data ->> 'id') is null + and max(a2.data ->> 'id') is null + and max(u.ap_id) is null + ) + """ + |> Repo.query([], timeout: :infinity) end - prune_hashtags_query = """ + """ DELETE FROM hashtags AS ht WHERE NOT EXISTS ( SELECT 1 FROM hashtags_objects hto WHERE ht.id = hto.hashtag_id) """ - - Repo.query(prune_hashtags_query) + |> Repo.query() if Keyword.get(options, :vacuum) do Maintenance.vacuum("full") diff --git a/test/mix/tasks/pleroma/database_test.exs b/test/mix/tasks/pleroma/database_test.exs index 7f5cd91a9..402856f3d 100644 --- a/test/mix/tasks/pleroma/database_test.exs +++ b/test/mix/tasks/pleroma/database_test.exs @@ -354,7 +354,7 @@ defmodule Mix.Tasks.Pleroma.DatabaseTest do assert length(Repo.all(Object)) == 1 end - test "We don't have unexpected tables which can contain objects that are referenced by activities" do + test "We don't have unexpected tables which may contain objects that are referenced by activities" do # We can delete orphaned activities. For that we look for the objects they reference in the 'objects', 'activities', and 'users' table. # If someone adds another table with objects (idk, maybe with separate relations, or collections or w/e), then we need to make sure we # add logic for that in the 'prune_objects' task so that we don't wrongly delete their corresponding activities. @@ -481,6 +481,69 @@ defmodule Mix.Tasks.Pleroma.DatabaseTest do assert length(activities) == 4 end + + test "it prunes orphaned activities with the --prune-orphaned-activities when the objects are referenced from an array" do + %Object{} |> Map.merge(%{data: %{"id" => "existing_object"}}) |> Repo.insert() + %User{} |> Map.merge(%{ap_id: "existing_actor"}) |> Repo.insert() + + # Multiple objects, one object exists (keep) + %Activity{} + |> Map.merge(%{ + local: false, + data: %{ + "id" => "remote_activity_existing_object", + "object" => ["non_ existing_object", "existing_object"] + } + }) + |> Repo.insert() + + # Multiple objects, one actor exists (keep) + %Activity{} + |> Map.merge(%{ + local: false, + data: %{ + "id" => "remote_activity_existing_actor", + "object" => ["non_ existing_object", "existing_actor"] + } + }) + |> Repo.insert() + + # Multiple objects, one activity exists (keep) + %Activity{} + |> Map.merge(%{ + local: false, + data: %{ + "id" => "remote_activity_existing_activity", + "object" => ["non_ existing_object", "remote_activity_existing_actor"] + } + }) + |> Repo.insert() + + # Multiple objects none exist (prune) + %Activity{} + |> Map.merge(%{ + local: false, + data: %{ + "id" => "remote_activity_without_existing_referenced_object", + "object" => ["owo", "whats_this"] + } + }) + |> Repo.insert() + + assert length(Repo.all(Activity)) == 4 + Mix.Tasks.Pleroma.Database.run(["prune_objects"]) + assert length(Repo.all(Activity)) == 4 + Mix.Tasks.Pleroma.Database.run(["prune_objects", "--prune-orphaned-activities"]) + activities = Repo.all(Activity) + assert length(activities) == 3 + + assert "remote_activity_without_existing_referenced_object" not in Enum.map( + activities, + fn a -> a.data["id"] end + ) + + assert length(activities) == 3 + end end describe "running update_users_following_followers_counts" do