forked from AkkomaGang/akkoma
prune_objects can prune orphaned activities who reference an array of objects
E.g. Flag activities have an array of objects We prune the activity when NONE of the objects can be found Note that the cost of finding and deleting these is ~4x higher than finding and deleting the non-array ones Only string: Delete on activities (cost=506573.48..506580.38 rows=0 width=0) Only Array: Delete on activities (cost=3570359.68..4276365.34 rows=0 width=0) (They are still executed separately, so the total cost is the sum of the two)
This commit is contained in:
parent
a7ec6e039c
commit
57eef6d764
2 changed files with 94 additions and 18 deletions
|
@ -172,35 +172,48 @@ def run(["prune_objects" | args]) do
|
||||||
|> Repo.delete_all(timeout: :infinity)
|
|> Repo.delete_all(timeout: :infinity)
|
||||||
|
|
||||||
if Keyword.get(options, :prune_orphaned_activities) do
|
if Keyword.get(options, :prune_orphaned_activities) do
|
||||||
|
# Prune activities who link to a single object
|
||||||
"""
|
"""
|
||||||
delete from public.activities
|
delete from public.activities
|
||||||
where id in (
|
where id in (
|
||||||
select a.id from public.activities a
|
select a.id from public.activities a
|
||||||
left join public.objects o on a.data ->> 'object' = o.data ->> 'id'
|
left join public.objects o on a.data ->> 'object' = o.data ->> 'id'
|
||||||
left join public.activities a2 on a.data ->> 'object' = a2.data ->> 'id'
|
left join public.activities a2 on a.data ->> 'object' = a2.data ->> 'id'
|
||||||
left join public.users u on a.data ->> 'object' = u.ap_id
|
left join public.users u on a.data ->> 'object' = u.ap_id
|
||||||
-- Only clean up remote activities
|
where not a.local
|
||||||
where not a.local
|
and jsonb_typeof(a."data" -> 'object') = 'string'
|
||||||
-- For now we only focus on activities with direct links to objects
|
and o.id is null
|
||||||
-- e.g. not json objects (in case of embedded objects) or json arrays (in case of multiple objects)
|
and a2.id is null
|
||||||
and jsonb_typeof(a."data" -> 'object') = 'string'
|
and u.id is null
|
||||||
-- Find Activities that don't have existing objects
|
|
||||||
and o.id is null
|
|
||||||
and a2.id is null
|
|
||||||
and u.id is null
|
|
||||||
)
|
)
|
||||||
"""
|
"""
|
||||||
|> Repo.query()
|
|> Repo.query([], timeout: :infinity)
|
||||||
|
|
||||||
|
# Prune activities who link to an array of objects
|
||||||
|
"""
|
||||||
|
delete from public.activities
|
||||||
|
where id in (
|
||||||
|
select a.id from public.activities a
|
||||||
|
join json_array_elements_text((a."data" -> 'object')::json) as j on jsonb_typeof(a."data" -> 'object') = 'array'
|
||||||
|
left join public.objects o on j.value = o.data ->> 'id'
|
||||||
|
left join public.activities a2 on j.value = a2.data ->> 'id'
|
||||||
|
left join public.users u on j.value = u.ap_id
|
||||||
|
group by a.id
|
||||||
|
having max(o.data ->> 'id') is null
|
||||||
|
and max(a2.data ->> 'id') is null
|
||||||
|
and max(u.ap_id) is null
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
|> Repo.query([], timeout: :infinity)
|
||||||
end
|
end
|
||||||
|
|
||||||
prune_hashtags_query = """
|
"""
|
||||||
DELETE FROM hashtags AS ht
|
DELETE FROM hashtags AS ht
|
||||||
WHERE NOT EXISTS (
|
WHERE NOT EXISTS (
|
||||||
SELECT 1 FROM hashtags_objects hto
|
SELECT 1 FROM hashtags_objects hto
|
||||||
WHERE ht.id = hto.hashtag_id)
|
WHERE ht.id = hto.hashtag_id)
|
||||||
"""
|
"""
|
||||||
|
|> Repo.query()
|
||||||
Repo.query(prune_hashtags_query)
|
|
||||||
|
|
||||||
if Keyword.get(options, :vacuum) do
|
if Keyword.get(options, :vacuum) do
|
||||||
Maintenance.vacuum("full")
|
Maintenance.vacuum("full")
|
||||||
|
|
|
@ -354,7 +354,7 @@ test "with the --keep-threads option it keeps old threads with bookmarked posts"
|
||||||
assert length(Repo.all(Object)) == 1
|
assert length(Repo.all(Object)) == 1
|
||||||
end
|
end
|
||||||
|
|
||||||
test "We don't have unexpected tables which can contain objects that are referenced by activities" do
|
test "We don't have unexpected tables which may contain objects that are referenced by activities" do
|
||||||
# We can delete orphaned activities. For that we look for the objects they reference in the 'objects', 'activities', and 'users' table.
|
# We can delete orphaned activities. For that we look for the objects they reference in the 'objects', 'activities', and 'users' table.
|
||||||
# If someone adds another table with objects (idk, maybe with separate relations, or collections or w/e), then we need to make sure we
|
# If someone adds another table with objects (idk, maybe with separate relations, or collections or w/e), then we need to make sure we
|
||||||
# add logic for that in the 'prune_objects' task so that we don't wrongly delete their corresponding activities.
|
# add logic for that in the 'prune_objects' task so that we don't wrongly delete their corresponding activities.
|
||||||
|
@ -481,6 +481,69 @@ test "it prunes orphaned activities with the --prune-orphaned-activities" do
|
||||||
|
|
||||||
assert length(activities) == 4
|
assert length(activities) == 4
|
||||||
end
|
end
|
||||||
|
|
||||||
|
test "it prunes orphaned activities with the --prune-orphaned-activities when the objects are referenced from an array" do
|
||||||
|
%Object{} |> Map.merge(%{data: %{"id" => "existing_object"}}) |> Repo.insert()
|
||||||
|
%User{} |> Map.merge(%{ap_id: "existing_actor"}) |> Repo.insert()
|
||||||
|
|
||||||
|
# Multiple objects, one object exists (keep)
|
||||||
|
%Activity{}
|
||||||
|
|> Map.merge(%{
|
||||||
|
local: false,
|
||||||
|
data: %{
|
||||||
|
"id" => "remote_activity_existing_object",
|
||||||
|
"object" => ["non_ existing_object", "existing_object"]
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|> Repo.insert()
|
||||||
|
|
||||||
|
# Multiple objects, one actor exists (keep)
|
||||||
|
%Activity{}
|
||||||
|
|> Map.merge(%{
|
||||||
|
local: false,
|
||||||
|
data: %{
|
||||||
|
"id" => "remote_activity_existing_actor",
|
||||||
|
"object" => ["non_ existing_object", "existing_actor"]
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|> Repo.insert()
|
||||||
|
|
||||||
|
# Multiple objects, one activity exists (keep)
|
||||||
|
%Activity{}
|
||||||
|
|> Map.merge(%{
|
||||||
|
local: false,
|
||||||
|
data: %{
|
||||||
|
"id" => "remote_activity_existing_activity",
|
||||||
|
"object" => ["non_ existing_object", "remote_activity_existing_actor"]
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|> Repo.insert()
|
||||||
|
|
||||||
|
# Multiple objects none exist (prune)
|
||||||
|
%Activity{}
|
||||||
|
|> Map.merge(%{
|
||||||
|
local: false,
|
||||||
|
data: %{
|
||||||
|
"id" => "remote_activity_without_existing_referenced_object",
|
||||||
|
"object" => ["owo", "whats_this"]
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|> Repo.insert()
|
||||||
|
|
||||||
|
assert length(Repo.all(Activity)) == 4
|
||||||
|
Mix.Tasks.Pleroma.Database.run(["prune_objects"])
|
||||||
|
assert length(Repo.all(Activity)) == 4
|
||||||
|
Mix.Tasks.Pleroma.Database.run(["prune_objects", "--prune-orphaned-activities"])
|
||||||
|
activities = Repo.all(Activity)
|
||||||
|
assert length(activities) == 3
|
||||||
|
|
||||||
|
assert "remote_activity_without_existing_referenced_object" not in Enum.map(
|
||||||
|
activities,
|
||||||
|
fn a -> a.data["id"] end
|
||||||
|
)
|
||||||
|
|
||||||
|
assert length(activities) == 3
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
describe "running update_users_following_followers_counts" do
|
describe "running update_users_following_followers_counts" do
|
||||||
|
|
Loading…
Reference in a new issue