prune_objects can prune orphaned activities
We add an option to also prune remote activities who don't have existing objects any more they reference. Rn, we only check for activities who only reference one object, not an array or embeded object.
This commit is contained in:
parent
9f34294332
commit
a7ec6e039c
2 changed files with 167 additions and 1 deletions
|
@ -69,7 +69,8 @@ def run(["prune_objects" | args]) do
|
|||
strict: [
|
||||
vacuum: :boolean,
|
||||
keep_threads: :boolean,
|
||||
keep_non_public: :boolean
|
||||
keep_non_public: :boolean,
|
||||
prune_orphaned_activities: :boolean
|
||||
]
|
||||
)
|
||||
|
||||
|
@ -94,6 +95,21 @@ def run(["prune_objects" | args]) do
|
|||
log_message
|
||||
end
|
||||
|
||||
log_message =
|
||||
if Keyword.get(options, :prune_orphaned_activities) do
|
||||
log_message <> ", pruning orphaned activities"
|
||||
else
|
||||
log_message
|
||||
end
|
||||
|
||||
log_message =
|
||||
if Keyword.get(options, :vacuum) do
|
||||
log_message <>
|
||||
", doing a full vacuum (you shouldn't do this as a recurring maintanance task)"
|
||||
else
|
||||
log_message
|
||||
end
|
||||
|
||||
Logger.info(log_message)
|
||||
|
||||
if Keyword.get(options, :keep_threads) do
|
||||
|
@ -155,6 +171,28 @@ def run(["prune_objects" | args]) do
|
|||
end
|
||||
|> Repo.delete_all(timeout: :infinity)
|
||||
|
||||
if Keyword.get(options, :prune_orphaned_activities) do
|
||||
"""
|
||||
delete from public.activities
|
||||
where id in (
|
||||
select a.id from public.activities a
|
||||
left join public.objects o on a.data ->> 'object' = o.data ->> 'id'
|
||||
left join public.activities a2 on a.data ->> 'object' = a2.data ->> 'id'
|
||||
left join public.users u on a.data ->> 'object' = u.ap_id
|
||||
-- Only clean up remote activities
|
||||
where not a.local
|
||||
-- For now we only focus on activities with direct links to objects
|
||||
-- e.g. not json objects (in case of embedded objects) or json arrays (in case of multiple objects)
|
||||
and jsonb_typeof(a."data" -> 'object') = 'string'
|
||||
-- Find Activities that don't have existing objects
|
||||
and o.id is null
|
||||
and a2.id is null
|
||||
and u.id is null
|
||||
)
|
||||
"""
|
||||
|> Repo.query()
|
||||
end
|
||||
|
||||
prune_hashtags_query = """
|
||||
DELETE FROM hashtags AS ht
|
||||
WHERE NOT EXISTS (
|
||||
|
|
|
@ -353,6 +353,134 @@ test "with the --keep-threads option it keeps old threads with bookmarked posts"
|
|||
|
||||
assert length(Repo.all(Object)) == 1
|
||||
end
|
||||
|
||||
test "We don't have unexpected tables which can contain objects that are referenced by activities" do
|
||||
# We can delete orphaned activities. For that we look for the objects they reference in the 'objects', 'activities', and 'users' table.
|
||||
# If someone adds another table with objects (idk, maybe with separate relations, or collections or w/e), then we need to make sure we
|
||||
# add logic for that in the 'prune_objects' task so that we don't wrongly delete their corresponding activities.
|
||||
# So when someone adds (or removes) a table, this test will fail.
|
||||
# Either the table contains objects which can be referenced from the activities table
|
||||
# => in that case the prune_objects job should be adapted so we don't delete activities who still have the referenced object.
|
||||
# Or it doesn't contain objects which can be referenced from the activities table
|
||||
# => in that case you can add/remove the table to/from this (sorted) list.
|
||||
|
||||
assert Repo.query!(
|
||||
"SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';"
|
||||
).rows
|
||||
|> Enum.sort() == [
|
||||
["activities"],
|
||||
["announcement_read_relationships"],
|
||||
["announcements"],
|
||||
["apps"],
|
||||
["backups"],
|
||||
["bookmarks"],
|
||||
["chat_message_references"],
|
||||
["chats"],
|
||||
["config"],
|
||||
["conversation_participation_recipient_ships"],
|
||||
["conversation_participations"],
|
||||
["conversations"],
|
||||
["counter_cache"],
|
||||
["data_migration_failed_ids"],
|
||||
["data_migrations"],
|
||||
["deliveries"],
|
||||
["filters"],
|
||||
["following_relationships"],
|
||||
["hashtags"],
|
||||
["hashtags_objects"],
|
||||
["instances"],
|
||||
["lists"],
|
||||
["markers"],
|
||||
["mfa_tokens"],
|
||||
["moderation_log"],
|
||||
["notifications"],
|
||||
["oauth_authorizations"],
|
||||
["oauth_tokens"],
|
||||
["oban_jobs"],
|
||||
["oban_peers"],
|
||||
["objects"],
|
||||
["password_reset_tokens"],
|
||||
["push_subscriptions"],
|
||||
["registrations"],
|
||||
["report_notes"],
|
||||
["scheduled_activities"],
|
||||
["schema_migrations"],
|
||||
["thread_mutes"],
|
||||
["user_follows_hashtag"],
|
||||
["user_frontend_setting_profiles"],
|
||||
["user_invite_tokens"],
|
||||
["user_notes"],
|
||||
["user_relationships"],
|
||||
["users"]
|
||||
]
|
||||
end
|
||||
|
||||
test "it prunes orphaned activities with the --prune-orphaned-activities" do
|
||||
# Add a remote activity which references an Object
|
||||
%Object{} |> Map.merge(%{data: %{"id" => "object_for_activity"}}) |> Repo.insert()
|
||||
|
||||
%Activity{}
|
||||
|> Map.merge(%{
|
||||
local: false,
|
||||
data: %{"id" => "remote_activity_with_object", "object" => "object_for_activity"}
|
||||
})
|
||||
|> Repo.insert()
|
||||
|
||||
# Add a remote activity which references an activity
|
||||
%Activity{}
|
||||
|> Map.merge(%{
|
||||
local: false,
|
||||
data: %{
|
||||
"id" => "remote_activity_with_activity",
|
||||
"object" => "remote_activity_with_object"
|
||||
}
|
||||
})
|
||||
|> Repo.insert()
|
||||
|
||||
# Add a remote activity which references an Actor
|
||||
%User{} |> Map.merge(%{ap_id: "actor"}) |> Repo.insert()
|
||||
|
||||
%Activity{}
|
||||
|> Map.merge(%{
|
||||
local: false,
|
||||
data: %{"id" => "remote_activity_with_actor", "object" => "actor"}
|
||||
})
|
||||
|> Repo.insert()
|
||||
|
||||
# Add a remote activity without existing referenced object, activity or actor
|
||||
%Activity{}
|
||||
|> Map.merge(%{
|
||||
local: false,
|
||||
data: %{
|
||||
"id" => "remote_activity_without_existing_referenced_object",
|
||||
"object" => "non_existing"
|
||||
}
|
||||
})
|
||||
|> Repo.insert()
|
||||
|
||||
# Add a local activity without existing referenced object, activity or actor
|
||||
%Activity{}
|
||||
|> Map.merge(%{
|
||||
local: true,
|
||||
data: %{"id" => "local_activity_with_actor", "object" => "non_existing"}
|
||||
})
|
||||
|> Repo.insert()
|
||||
|
||||
# The remote activities without existing reference, and only the remote activities without existing reference, are deleted
|
||||
# if, and only if, we provide the --prune-orphaned-activities option
|
||||
assert length(Repo.all(Activity)) == 5
|
||||
Mix.Tasks.Pleroma.Database.run(["prune_objects"])
|
||||
assert length(Repo.all(Activity)) == 5
|
||||
Mix.Tasks.Pleroma.Database.run(["prune_objects", "--prune-orphaned-activities"])
|
||||
activities = Repo.all(Activity)
|
||||
|
||||
assert "remote_activity_without_existing_referenced_object" not in Enum.map(
|
||||
activities,
|
||||
fn a -> a.data["id"] end
|
||||
)
|
||||
|
||||
assert length(activities) == 4
|
||||
end
|
||||
end
|
||||
|
||||
describe "running update_users_following_followers_counts" do
|
||||
|
|
Loading…
Reference in a new issue