Build prune_objects --keep-threads query with Ecto

The query is now done using Ecto.
I also ran it on a local DB.
It Went from 4000834 records to 1734648 in about an hour without timeout.
This commit is contained in:
ilja 2022-12-10 19:29:04 +01:00
parent eb503f093c
commit 04cc1d41ce

View file

@ -75,34 +75,30 @@ def run(["prune_objects" | args]) do
start_pleroma() start_pleroma()
deadline = Pleroma.Config.get([:instance, :remote_post_retention_days]) deadline = Pleroma.Config.get([:instance, :remote_post_retention_days])
time_deadline = NaiveDateTime.utc_now() |> NaiveDateTime.add(-(deadline * 86_400))
Logger.info("Pruning objects older than #{deadline} days")
if Keyword.get(options, :keep_threads) do if Keyword.get(options, :keep_threads) do
# We delete objects from threads where Logger.info(
"Pruning objects older than #{deadline} days without local interaction, keeping threads intact"
)
# We want to delete objects from threads where
# 1. the newest post is still old # 1. the newest post is still old
# 2. none of the activities is local # 2. none of the activities is local
# 3. none of the activities is bookmarked # 3. none of the activities is bookmarked
delete_keep_threads_statement = """ deletable_context =
delete Pleroma.Activity
from public.objects o |> join(:left, [a], b in Pleroma.Bookmark, on: a.id == b.activity_id)
where o.data ->> 'context' in ( |> group_by([a], fragment("? ->> 'context'::text", a.data))
select |> having([a], max(a.updated_at) < ^time_deadline)
a.data ->> 'context' |> having([a], not fragment("bool_or(?)", a.local))
from public.activities a |> having([a, b], fragment("max(?::text) is null", b.id))
left join public.bookmarks b on a.id = b.activity_id |> select([a], fragment("? ->> 'context'::text", a.data))
group by (a.data ->> 'context'::text)
having max(a.updated_at) < now() - interval '#{deadline} day'
and not bool_or(a.local)
and max(b.id) is null
);
"""
Repo.query(delete_keep_threads_statement) Pleroma.Object
|> where([o], fragment("? ->> 'context'::text", o.data) in subquery(deletable_context))
else else
time_deadline = Logger.info("Pruning objects older than #{deadline} days")
NaiveDateTime.utc_now()
|> NaiveDateTime.add(-(deadline * 86_400))
from(o in Object, from(o in Object,
where: where:
@ -117,8 +113,8 @@ def run(["prune_objects" | args]) do
where: where:
fragment("split_part(?->>'actor', '/', 3) != ?", o.data, ^Pleroma.Web.Endpoint.host()) fragment("split_part(?->>'actor', '/', 3) != ?", o.data, ^Pleroma.Web.Endpoint.host())
) )
|> Repo.delete_all(timeout: :infinity)
end end
|> Repo.delete_all(timeout: :infinity)
prune_hashtags_query = """ prune_hashtags_query = """
DELETE FROM hashtags AS ht DELETE FROM hashtags AS ht