diff --git a/lib/mix/tasks/pleroma/database.ex b/lib/mix/tasks/pleroma/database.ex index be59e2271..0f428ca03 100644 --- a/lib/mix/tasks/pleroma/database.ex +++ b/lib/mix/tasks/pleroma/database.ex @@ -69,7 +69,8 @@ defmodule Mix.Tasks.Pleroma.Database do strict: [ vacuum: :boolean, keep_threads: :boolean, - keep_non_public: :boolean + keep_non_public: :boolean, + prune_orphaned_activities: :boolean ] ) @@ -94,6 +95,21 @@ defmodule Mix.Tasks.Pleroma.Database do log_message end + log_message = + if Keyword.get(options, :prune_orphaned_activities) do + log_message <> ", pruning orphaned activities" + else + log_message + end + + log_message = + if Keyword.get(options, :vacuum) do + log_message <> + ", doing a full vacuum (you shouldn't do this as a recurring maintanance task)" + else + log_message + end + Logger.info(log_message) if Keyword.get(options, :keep_threads) do @@ -155,6 +171,28 @@ defmodule Mix.Tasks.Pleroma.Database do end |> Repo.delete_all(timeout: :infinity) + if Keyword.get(options, :prune_orphaned_activities) do + """ + delete from public.activities + where id in ( + select a.id from public.activities a + left join public.objects o on a.data ->> 'object' = o.data ->> 'id' + left join public.activities a2 on a.data ->> 'object' = a2.data ->> 'id' + left join public.users u on a.data ->> 'object' = u.ap_id + -- Only clean up remote activities + where not a.local + -- For now we only focus on activities with direct links to objects + -- e.g. not json objects (in case of embedded objects) or json arrays (in case of multiple objects) + and jsonb_typeof(a."data" -> 'object') = 'string' + -- Find Activities that don't have existing objects + and o.id is null + and a2.id is null + and u.id is null + ) + """ + |> Repo.query() + end + prune_hashtags_query = """ DELETE FROM hashtags AS ht WHERE NOT EXISTS ( diff --git a/test/mix/tasks/pleroma/database_test.exs b/test/mix/tasks/pleroma/database_test.exs index 447a4404e..7f5cd91a9 100644 --- a/test/mix/tasks/pleroma/database_test.exs +++ b/test/mix/tasks/pleroma/database_test.exs @@ -353,6 +353,134 @@ defmodule Mix.Tasks.Pleroma.DatabaseTest do assert length(Repo.all(Object)) == 1 end + + test "We don't have unexpected tables which can contain objects that are referenced by activities" do + # We can delete orphaned activities. For that we look for the objects they reference in the 'objects', 'activities', and 'users' table. + # If someone adds another table with objects (idk, maybe with separate relations, or collections or w/e), then we need to make sure we + # add logic for that in the 'prune_objects' task so that we don't wrongly delete their corresponding activities. + # So when someone adds (or removes) a table, this test will fail. + # Either the table contains objects which can be referenced from the activities table + # => in that case the prune_objects job should be adapted so we don't delete activities who still have the referenced object. + # Or it doesn't contain objects which can be referenced from the activities table + # => in that case you can add/remove the table to/from this (sorted) list. + + assert Repo.query!( + "SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';" + ).rows + |> Enum.sort() == [ + ["activities"], + ["announcement_read_relationships"], + ["announcements"], + ["apps"], + ["backups"], + ["bookmarks"], + ["chat_message_references"], + ["chats"], + ["config"], + ["conversation_participation_recipient_ships"], + ["conversation_participations"], + ["conversations"], + ["counter_cache"], + ["data_migration_failed_ids"], + ["data_migrations"], + ["deliveries"], + ["filters"], + ["following_relationships"], + ["hashtags"], + ["hashtags_objects"], + ["instances"], + ["lists"], + ["markers"], + ["mfa_tokens"], + ["moderation_log"], + ["notifications"], + ["oauth_authorizations"], + ["oauth_tokens"], + ["oban_jobs"], + ["oban_peers"], + ["objects"], + ["password_reset_tokens"], + ["push_subscriptions"], + ["registrations"], + ["report_notes"], + ["scheduled_activities"], + ["schema_migrations"], + ["thread_mutes"], + ["user_follows_hashtag"], + ["user_frontend_setting_profiles"], + ["user_invite_tokens"], + ["user_notes"], + ["user_relationships"], + ["users"] + ] + end + + test "it prunes orphaned activities with the --prune-orphaned-activities" do + # Add a remote activity which references an Object + %Object{} |> Map.merge(%{data: %{"id" => "object_for_activity"}}) |> Repo.insert() + + %Activity{} + |> Map.merge(%{ + local: false, + data: %{"id" => "remote_activity_with_object", "object" => "object_for_activity"} + }) + |> Repo.insert() + + # Add a remote activity which references an activity + %Activity{} + |> Map.merge(%{ + local: false, + data: %{ + "id" => "remote_activity_with_activity", + "object" => "remote_activity_with_object" + } + }) + |> Repo.insert() + + # Add a remote activity which references an Actor + %User{} |> Map.merge(%{ap_id: "actor"}) |> Repo.insert() + + %Activity{} + |> Map.merge(%{ + local: false, + data: %{"id" => "remote_activity_with_actor", "object" => "actor"} + }) + |> Repo.insert() + + # Add a remote activity without existing referenced object, activity or actor + %Activity{} + |> Map.merge(%{ + local: false, + data: %{ + "id" => "remote_activity_without_existing_referenced_object", + "object" => "non_existing" + } + }) + |> Repo.insert() + + # Add a local activity without existing referenced object, activity or actor + %Activity{} + |> Map.merge(%{ + local: true, + data: %{"id" => "local_activity_with_actor", "object" => "non_existing"} + }) + |> Repo.insert() + + # The remote activities without existing reference, and only the remote activities without existing reference, are deleted + # if, and only if, we provide the --prune-orphaned-activities option + assert length(Repo.all(Activity)) == 5 + Mix.Tasks.Pleroma.Database.run(["prune_objects"]) + assert length(Repo.all(Activity)) == 5 + Mix.Tasks.Pleroma.Database.run(["prune_objects", "--prune-orphaned-activities"]) + activities = Repo.all(Activity) + + assert "remote_activity_without_existing_referenced_object" not in Enum.map( + activities, + fn a -> a.data["id"] end + ) + + assert length(activities) == 4 + end end describe "running update_users_following_followers_counts" do