Prune Objects --keep-threads option #350

Merged
floatingghost merged 4 commits from ilja/akkoma:prune_objects_whithout_breaking_threads into develop 2023-01-09 22:15:42 +00:00
2 changed files with 212 additions and 18 deletions
Showing only changes of commit eb503f093c - Show all commits

View File

@ -67,7 +67,8 @@ defmodule Mix.Tasks.Pleroma.Database do
OptionParser.parse(
args,
strict: [
vacuum: :boolean
vacuum: :boolean,
keep_threads: :boolean
]
)
@ -77,24 +78,47 @@ defmodule Mix.Tasks.Pleroma.Database do
Logger.info("Pruning objects older than #{deadline} days")
time_deadline =
NaiveDateTime.utc_now()
|> NaiveDateTime.add(-(deadline * 86_400))
if Keyword.get(options, :keep_threads) do
# We delete objects from threads where
# 1. the newest post is still old
# 2. none of the activities is local
# 3. none of the activities is bookmarked
delete_keep_threads_statement = """
delete
from public.objects o
where o.data ->> 'context' in (
select
a.data ->> 'context'
from public.activities a
left join public.bookmarks b on a.id = b.activity_id
group by (a.data ->> 'context'::text)
having max(a.updated_at) < now() - interval '#{deadline} day'
and not bool_or(a.local)
and max(b.id) is null
);
"""
from(o in Object,
where:
fragment(
"?->'to' \\? ? OR ?->'cc' \\? ?",
o.data,
^Pleroma.Constants.as_public(),
o.data,
^Pleroma.Constants.as_public()
),
where: o.inserted_at < ^time_deadline,
where:
fragment("split_part(?->>'actor', '/', 3) != ?", o.data, ^Pleroma.Web.Endpoint.host())
)
|> Repo.delete_all(timeout: :infinity)
Repo.query(delete_keep_threads_statement)
else
time_deadline =
NaiveDateTime.utc_now()
|> NaiveDateTime.add(-(deadline * 86_400))
from(o in Object,
where:
fragment(
"?->'to' \\? ? OR ?->'cc' \\? ?",
o.data,
^Pleroma.Constants.as_public(),
o.data,
^Pleroma.Constants.as_public()
),
where: o.inserted_at < ^time_deadline,
where:
fragment("split_part(?->>'actor', '/', 3) != ?", o.data, ^Pleroma.Web.Endpoint.host())
)
|> Repo.delete_all(timeout: :infinity)
end
prune_hashtags_query = """
DELETE FROM hashtags AS ht

View File

@ -68,6 +68,176 @@ defmodule Mix.Tasks.Pleroma.DatabaseTest do
assert length(Repo.all(Object)) == 1
refute Object.get_by_id(id)
end
test "with the --keep-threads option it still keeps non-old threads even with no local interactions" do
remote_user = insert(:user, local: false)
remote_user2 = insert(:user, local: false)
{:ok, remote_post_activity} =
CommonAPI.post(remote_user, %{status: "some thing", local: false})
{:ok, remote_post_reply_activity} =
CommonAPI.post(remote_user2, %{
status: "some reply",
in_reply_to_status_id: remote_post_activity.id
})
remote_post_activity
|> Ecto.Changeset.change(%{local: false})
|> Repo.update!()
remote_post_reply_activity
|> Ecto.Changeset.change(%{local: false})
|> Repo.update!()
assert length(Repo.all(Object)) == 2
Mix.Tasks.Pleroma.Database.run(["prune_objects", "--keep-threads"])
assert length(Repo.all(Object)) == 2
end
test "with the --keep-threads option it deletes old threads with no local interaction" do
deadline = Pleroma.Config.get([:instance, :remote_post_retention_days]) + 1
old_insert_date =
Timex.now()
|> Timex.shift(days: -deadline)
|> Timex.to_naive_datetime()
|> NaiveDateTime.truncate(:second)
remote_user = insert(:user, local: false)
remote_user2 = insert(:user, local: false)
{:ok, old_remote_post_activity} =
CommonAPI.post(remote_user, %{status: "some thing", local: false})
old_remote_post_activity
|> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
|> Repo.update!()
{:ok, old_remote_post_reply_activity} =
CommonAPI.post(remote_user2, %{
status: "some reply",
in_reply_to_status_id: old_remote_post_activity.id
})
old_remote_post_reply_activity
|> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
|> Repo.update!()
{:ok, old_favourite_activity} =
CommonAPI.favorite(remote_user2, old_remote_post_activity.id)
old_favourite_activity
|> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
|> Repo.update!()
{:ok, old_repeat_activity} = CommonAPI.repeat(old_remote_post_activity.id, remote_user2)
old_repeat_activity
|> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
|> Repo.update!()
assert length(Repo.all(Object)) == 2
Mix.Tasks.Pleroma.Database.run(["prune_objects", "--keep-threads"])
assert Repo.all(Object) == []
end
test "with the --keep-threads option it keeps old threads with local interaction" do
deadline = Pleroma.Config.get([:instance, :remote_post_retention_days]) + 1
old_insert_date =
Timex.now()
|> Timex.shift(days: -deadline)
|> Timex.to_naive_datetime()
|> NaiveDateTime.truncate(:second)
remote_user = insert(:user, local: false)
local_user = insert(:user, local: true)
# local reply
{:ok, old_remote_post1_activity} =
CommonAPI.post(remote_user, %{status: "some thing", local: false})
old_remote_post1_activity
|> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
|> Repo.update!()
{:ok, old_local_post2_reply_activity} =
CommonAPI.post(local_user, %{
status: "some reply",
in_reply_to_status_id: old_remote_post1_activity.id
})
old_local_post2_reply_activity
|> Ecto.Changeset.change(%{local: true, updated_at: old_insert_date})
|> Repo.update!()
# local Like
{:ok, old_remote_post3_activity} =
CommonAPI.post(remote_user, %{status: "some thing", local: false})
old_remote_post3_activity
|> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
|> Repo.update!()
{:ok, old_favourite_activity} = CommonAPI.favorite(local_user, old_remote_post3_activity.id)
old_favourite_activity
|> Ecto.Changeset.change(%{local: true, updated_at: old_insert_date})
|> Repo.update!()
# local Announce
{:ok, old_remote_post4_activity} =
CommonAPI.post(remote_user, %{status: "some thing", local: false})
old_remote_post4_activity
|> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
|> Repo.update!()
{:ok, old_repeat_activity} = CommonAPI.repeat(old_remote_post4_activity.id, local_user)
old_repeat_activity
|> Ecto.Changeset.change(%{local: true, updated_at: old_insert_date})
|> Repo.update!()
assert length(Repo.all(Object)) == 4
Mix.Tasks.Pleroma.Database.run(["prune_objects", "--keep-threads"])
assert length(Repo.all(Object)) == 4
end
test "with the --keep-threads option it keeps old threads with bookmarked posts" do
deadline = Pleroma.Config.get([:instance, :remote_post_retention_days]) + 1
old_insert_date =
Timex.now()
|> Timex.shift(days: -deadline)
|> Timex.to_naive_datetime()
|> NaiveDateTime.truncate(:second)
remote_user = insert(:user, local: false)
local_user = insert(:user, local: true)
{:ok, old_remote_post_activity} =
CommonAPI.post(remote_user, %{status: "some thing", local: false})
old_remote_post_activity
|> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
|> Repo.update!()
Pleroma.Bookmark.create(local_user.id, old_remote_post_activity.id)
assert length(Repo.all(Object)) == 1
Mix.Tasks.Pleroma.Database.run(["prune_objects", "--keep-threads"])
assert length(Repo.all(Object)) == 1
end
end
describe "running update_users_following_followers_counts" do