Add limit CLI flags to prune jobs #655

Merged
floatingghost merged 11 commits from Oneric/akkoma:prune-batch into develop 2024-06-17 20:47:53 +00:00
Showing only changes of commit 24bab63cd8 - Show all commits

View file

@ -68,6 +68,8 @@ def prune_orphaned_activities(limit \\ 0) when is_number(limit) do
""" """
|> Repo.query([], timeout: :infinity) |> Repo.query([], timeout: :infinity)
Logger.info("Prune activity singles: deleted #{del_single} rows...")
# Prune activities who link to an array of objects # Prune activities who link to an array of objects
{:ok, %{:num_rows => del_array}} = {:ok, %{:num_rows => del_array}} =
""" """
@ -88,6 +90,8 @@ def prune_orphaned_activities(limit \\ 0) when is_number(limit) do
""" """
|> Repo.query([], timeout: :infinity) |> Repo.query([], timeout: :infinity)
Logger.info("Prune activity arrays: deleted #{del_array} rows...")
del_single + del_array del_single + del_array
end end
@ -222,102 +226,115 @@ def run(["prune_objects" | args]) do
Logger.info(log_message) Logger.info(log_message)
if Keyword.get(options, :keep_threads) do {del_obj, _} =
# We want to delete objects from threads where if Keyword.get(options, :keep_threads) do
# 1. the newest post is still old # We want to delete objects from threads where
# 2. none of the activities is local # 1. the newest post is still old
# 3. none of the activities is bookmarked # 2. none of the activities is local
# 4. optionally none of the posts is non-public # 3. none of the activities is bookmarked
deletable_context = # 4. optionally none of the posts is non-public
if Keyword.get(options, :keep_non_public) do deletable_context =
Pleroma.Activity if Keyword.get(options, :keep_non_public) do
|> join(:left, [a], b in Pleroma.Bookmark, on: a.id == b.activity_id) Pleroma.Activity
|> group_by([a], fragment("? ->> 'context'::text", a.data)) |> join(:left, [a], b in Pleroma.Bookmark, on: a.id == b.activity_id)
|> having( |> group_by([a], fragment("? ->> 'context'::text", a.data))
[a], |> having(
not fragment( [a],
# Posts (checked on Create Activity) is non-public not fragment(
"bool_or((not(?->'to' \\? ? OR ?->'cc' \\? ?)) and ? ->> 'type' = 'Create')", # Posts (checked on Create Activity) is non-public
a.data, "bool_or((not(?->'to' \\? ? OR ?->'cc' \\? ?)) and ? ->> 'type' = 'Create')",
^Pleroma.Constants.as_public(), a.data,
a.data, ^Pleroma.Constants.as_public(),
^Pleroma.Constants.as_public(), a.data,
a.data ^Pleroma.Constants.as_public(),
a.data
)
) )
) else
else Pleroma.Activity
Pleroma.Activity |> join(:left, [a], b in Pleroma.Bookmark, on: a.id == b.activity_id)
|> join(:left, [a], b in Pleroma.Bookmark, on: a.id == b.activity_id) |> group_by([a], fragment("? ->> 'context'::text", a.data))
|> group_by([a], fragment("? ->> 'context'::text", a.data)) end
end |> having([a], max(a.updated_at) < ^time_deadline)
|> having([a], max(a.updated_at) < ^time_deadline) |> having([a], not fragment("bool_or(?)", a.local))
|> having([a], not fragment("bool_or(?)", a.local)) |> having([_, b], fragment("max(?::text) is null", b.id))
|> having([_, b], fragment("max(?::text) is null", b.id)) |> maybe_limit(limit_cnt)
|> maybe_limit(limit_cnt) |> select([a], fragment("? ->> 'context'::text", a.data))
|> select([a], fragment("? ->> 'context'::text", a.data))
Pleroma.Object Pleroma.Object
|> where([o], fragment("? ->> 'context'::text", o.data) in subquery(deletable_context)) |> where([o], fragment("? ->> 'context'::text", o.data) in subquery(deletable_context))
else else
deletable = deletable =
if Keyword.get(options, :keep_non_public) do if Keyword.get(options, :keep_non_public) do
Pleroma.Object Pleroma.Object
|> where(
[o],
fragment(
"?->'to' \\? ? OR ?->'cc' \\? ?",
o.data,
^Pleroma.Constants.as_public(),
o.data,
^Pleroma.Constants.as_public()
)
)
else
Pleroma.Object
end
|> where([o], o.updated_at < ^time_deadline)
|> where( |> where(
[o], [o],
fragment( fragment("split_part(?->>'actor', '/', 3) != ?", o.data, ^Pleroma.Web.Endpoint.host())
"?->'to' \\? ? OR ?->'cc' \\? ?",
o.data,
^Pleroma.Constants.as_public(),
o.data,
^Pleroma.Constants.as_public()
)
) )
else |> maybe_limit(limit_cnt)
Pleroma.Object |> select([o], o.id)
end
|> where([o], o.updated_at < ^time_deadline)
|> where(
[o],
fragment("split_part(?->>'actor', '/', 3) != ?", o.data, ^Pleroma.Web.Endpoint.host())
)
|> maybe_limit(limit_cnt)
|> select([o], o.id)
Pleroma.Object Pleroma.Object
|> where([o], o.id in subquery(deletable)) |> where([o], o.id in subquery(deletable))
end end
|> Repo.delete_all(timeout: :infinity) |> Repo.delete_all(timeout: :infinity)
Logger.info("Deleted #{del_obj} objects...")
if !Keyword.get(options, :keep_threads) do if !Keyword.get(options, :keep_threads) do
# Without the --keep-threads option, it's possible that bookmarked # Without the --keep-threads option, it's possible that bookmarked
# objects have been deleted. We remove the corresponding bookmarks. # objects have been deleted. We remove the corresponding bookmarks.
""" {:ok, %{:num_rows => del_bookmarks}} =
delete from public.bookmarks """
where id in ( delete from public.bookmarks
select b.id from public.bookmarks b where id in (
left join public.activities a on b.activity_id = a.id select b.id from public.bookmarks b
left join public.objects o on a."data" ->> 'object' = o.data ->> 'id' left join public.activities a on b.activity_id = a.id
where o.id is null left join public.objects o on a."data" ->> 'object' = o.data ->> 'id'
) where o.id is null
""" )
|> Repo.query([], timeout: :infinity) """
|> Repo.query([], timeout: :infinity)
Logger.info("Deleted #{del_bookmarks} orphaned bookmarks...")
end end
if Keyword.get(options, :prune_orphaned_activities) do if Keyword.get(options, :prune_orphaned_activities) do
prune_orphaned_activities() del_activities = prune_orphaned_activities()
Logger.info("Deleted #{del_activities} orphaned activities...")
end end
""" {:ok, %{:num_rows => del_hashtags}} =
DELETE FROM hashtags AS ht """
WHERE NOT EXISTS ( DELETE FROM hashtags AS ht
SELECT 1 FROM hashtags_objects hto WHERE NOT EXISTS (
WHERE ht.id = hto.hashtag_id) SELECT 1 FROM hashtags_objects hto
""" WHERE ht.id = hto.hashtag_id)
|> Repo.query() """
|> Repo.query()
Logger.info("Deleted #{del_hashtags} no longer used hashtags...")
if Keyword.get(options, :vacuum) do if Keyword.get(options, :vacuum) do
Logger.info("Starting vacuum...")
Maintenance.vacuum("full") Maintenance.vacuum("full")
end end
Logger.info("All done!")
end end
def run(["prune_task"]) do def run(["prune_task"]) do