Prune Objects --keep-threads
This adds an option to the prune_objects mix task. The original way deleted all non-local public posts older than a certain time frame. Here we add a different query which you can call using the option --keep-threads. We query from the activities table all context id's where 1. the newest activity with this context is still old 2. none of the activities with this context is is local 3. none of the activities with this context is bookmarked and delete all objects with these contexts. The idea is that posts with local activities (posts, replies, likes, repeats...) may be intersesting to keep. Besides that, a post lives in a certain context (the thread), so we keep the whole thread as well. Caveats: * Quotes have a different context. Therefore, when someone quotes a post, it's possible the quoted post will still be deleted. * Although undocumented (in docs/docs/administration/CLI_tasks/database.md/#prune-old-remote-posts-from-the-database), the 'normal' delete action still keeps old remote non-public posts. With this option we don't care about scope. * I ran this on my instance, but directly on the DB. I still need to test to be sure that we don't get a time-out error or something. Some statistics from explain analyse: (cost=1402845.92..1933782.00 rows=3810907 width=62) (actual time=2562455.486..2562455.495 rows=0 loops=1) Planning Time: 505.327 ms Trigger for constraint chat_message_references_object_id_fkey: time=651939.797 calls=921740 Trigger for constraint deliveries_object_id_fkey: time=52036.009 calls=921740 Trigger for constraint hashtags_objects_object_id_fkey: time=20665.778 calls=921740 Execution Time: 3287933.902 ms
This commit is contained in:
parent
336d06b2a8
commit
eb503f093c
2 changed files with 212 additions and 18 deletions
|
@ -67,7 +67,8 @@ def run(["prune_objects" | args]) do
|
||||||
OptionParser.parse(
|
OptionParser.parse(
|
||||||
args,
|
args,
|
||||||
strict: [
|
strict: [
|
||||||
vacuum: :boolean
|
vacuum: :boolean,
|
||||||
|
keep_threads: :boolean
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -77,24 +78,47 @@ def run(["prune_objects" | args]) do
|
||||||
|
|
||||||
Logger.info("Pruning objects older than #{deadline} days")
|
Logger.info("Pruning objects older than #{deadline} days")
|
||||||
|
|
||||||
time_deadline =
|
if Keyword.get(options, :keep_threads) do
|
||||||
NaiveDateTime.utc_now()
|
# We delete objects from threads where
|
||||||
|> NaiveDateTime.add(-(deadline * 86_400))
|
# 1. the newest post is still old
|
||||||
|
# 2. none of the activities is local
|
||||||
|
# 3. none of the activities is bookmarked
|
||||||
|
delete_keep_threads_statement = """
|
||||||
|
delete
|
||||||
|
from public.objects o
|
||||||
|
where o.data ->> 'context' in (
|
||||||
|
select
|
||||||
|
a.data ->> 'context'
|
||||||
|
from public.activities a
|
||||||
|
left join public.bookmarks b on a.id = b.activity_id
|
||||||
|
group by (a.data ->> 'context'::text)
|
||||||
|
having max(a.updated_at) < now() - interval '#{deadline} day'
|
||||||
|
and not bool_or(a.local)
|
||||||
|
and max(b.id) is null
|
||||||
|
);
|
||||||
|
"""
|
||||||
|
|
||||||
from(o in Object,
|
Repo.query(delete_keep_threads_statement)
|
||||||
where:
|
else
|
||||||
fragment(
|
time_deadline =
|
||||||
"?->'to' \\? ? OR ?->'cc' \\? ?",
|
NaiveDateTime.utc_now()
|
||||||
o.data,
|
|> NaiveDateTime.add(-(deadline * 86_400))
|
||||||
^Pleroma.Constants.as_public(),
|
|
||||||
o.data,
|
from(o in Object,
|
||||||
^Pleroma.Constants.as_public()
|
where:
|
||||||
),
|
fragment(
|
||||||
where: o.inserted_at < ^time_deadline,
|
"?->'to' \\? ? OR ?->'cc' \\? ?",
|
||||||
where:
|
o.data,
|
||||||
fragment("split_part(?->>'actor', '/', 3) != ?", o.data, ^Pleroma.Web.Endpoint.host())
|
^Pleroma.Constants.as_public(),
|
||||||
)
|
o.data,
|
||||||
|> Repo.delete_all(timeout: :infinity)
|
^Pleroma.Constants.as_public()
|
||||||
|
),
|
||||||
|
where: o.inserted_at < ^time_deadline,
|
||||||
|
where:
|
||||||
|
fragment("split_part(?->>'actor', '/', 3) != ?", o.data, ^Pleroma.Web.Endpoint.host())
|
||||||
|
)
|
||||||
|
|> Repo.delete_all(timeout: :infinity)
|
||||||
|
end
|
||||||
|
|
||||||
prune_hashtags_query = """
|
prune_hashtags_query = """
|
||||||
DELETE FROM hashtags AS ht
|
DELETE FROM hashtags AS ht
|
||||||
|
|
|
@ -68,6 +68,176 @@ test "it prunes old objects from the database" do
|
||||||
assert length(Repo.all(Object)) == 1
|
assert length(Repo.all(Object)) == 1
|
||||||
refute Object.get_by_id(id)
|
refute Object.get_by_id(id)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
test "with the --keep-threads option it still keeps non-old threads even with no local interactions" do
|
||||||
|
remote_user = insert(:user, local: false)
|
||||||
|
remote_user2 = insert(:user, local: false)
|
||||||
|
|
||||||
|
{:ok, remote_post_activity} =
|
||||||
|
CommonAPI.post(remote_user, %{status: "some thing", local: false})
|
||||||
|
|
||||||
|
{:ok, remote_post_reply_activity} =
|
||||||
|
CommonAPI.post(remote_user2, %{
|
||||||
|
status: "some reply",
|
||||||
|
in_reply_to_status_id: remote_post_activity.id
|
||||||
|
})
|
||||||
|
|
||||||
|
remote_post_activity
|
||||||
|
|> Ecto.Changeset.change(%{local: false})
|
||||||
|
|> Repo.update!()
|
||||||
|
|
||||||
|
remote_post_reply_activity
|
||||||
|
|> Ecto.Changeset.change(%{local: false})
|
||||||
|
|> Repo.update!()
|
||||||
|
|
||||||
|
assert length(Repo.all(Object)) == 2
|
||||||
|
|
||||||
|
Mix.Tasks.Pleroma.Database.run(["prune_objects", "--keep-threads"])
|
||||||
|
|
||||||
|
assert length(Repo.all(Object)) == 2
|
||||||
|
end
|
||||||
|
|
||||||
|
test "with the --keep-threads option it deletes old threads with no local interaction" do
|
||||||
|
deadline = Pleroma.Config.get([:instance, :remote_post_retention_days]) + 1
|
||||||
|
|
||||||
|
old_insert_date =
|
||||||
|
Timex.now()
|
||||||
|
|> Timex.shift(days: -deadline)
|
||||||
|
|> Timex.to_naive_datetime()
|
||||||
|
|> NaiveDateTime.truncate(:second)
|
||||||
|
|
||||||
|
remote_user = insert(:user, local: false)
|
||||||
|
remote_user2 = insert(:user, local: false)
|
||||||
|
|
||||||
|
{:ok, old_remote_post_activity} =
|
||||||
|
CommonAPI.post(remote_user, %{status: "some thing", local: false})
|
||||||
|
|
||||||
|
old_remote_post_activity
|
||||||
|
|> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
|
||||||
|
|> Repo.update!()
|
||||||
|
|
||||||
|
{:ok, old_remote_post_reply_activity} =
|
||||||
|
CommonAPI.post(remote_user2, %{
|
||||||
|
status: "some reply",
|
||||||
|
in_reply_to_status_id: old_remote_post_activity.id
|
||||||
|
})
|
||||||
|
|
||||||
|
old_remote_post_reply_activity
|
||||||
|
|> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
|
||||||
|
|> Repo.update!()
|
||||||
|
|
||||||
|
{:ok, old_favourite_activity} =
|
||||||
|
CommonAPI.favorite(remote_user2, old_remote_post_activity.id)
|
||||||
|
|
||||||
|
old_favourite_activity
|
||||||
|
|> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
|
||||||
|
|> Repo.update!()
|
||||||
|
|
||||||
|
{:ok, old_repeat_activity} = CommonAPI.repeat(old_remote_post_activity.id, remote_user2)
|
||||||
|
|
||||||
|
old_repeat_activity
|
||||||
|
|> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
|
||||||
|
|> Repo.update!()
|
||||||
|
|
||||||
|
assert length(Repo.all(Object)) == 2
|
||||||
|
|
||||||
|
Mix.Tasks.Pleroma.Database.run(["prune_objects", "--keep-threads"])
|
||||||
|
|
||||||
|
assert Repo.all(Object) == []
|
||||||
|
end
|
||||||
|
|
||||||
|
test "with the --keep-threads option it keeps old threads with local interaction" do
|
||||||
|
deadline = Pleroma.Config.get([:instance, :remote_post_retention_days]) + 1
|
||||||
|
|
||||||
|
old_insert_date =
|
||||||
|
Timex.now()
|
||||||
|
|> Timex.shift(days: -deadline)
|
||||||
|
|> Timex.to_naive_datetime()
|
||||||
|
|> NaiveDateTime.truncate(:second)
|
||||||
|
|
||||||
|
remote_user = insert(:user, local: false)
|
||||||
|
local_user = insert(:user, local: true)
|
||||||
|
|
||||||
|
# local reply
|
||||||
|
{:ok, old_remote_post1_activity} =
|
||||||
|
CommonAPI.post(remote_user, %{status: "some thing", local: false})
|
||||||
|
|
||||||
|
old_remote_post1_activity
|
||||||
|
|> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
|
||||||
|
|> Repo.update!()
|
||||||
|
|
||||||
|
{:ok, old_local_post2_reply_activity} =
|
||||||
|
CommonAPI.post(local_user, %{
|
||||||
|
status: "some reply",
|
||||||
|
in_reply_to_status_id: old_remote_post1_activity.id
|
||||||
|
})
|
||||||
|
|
||||||
|
old_local_post2_reply_activity
|
||||||
|
|> Ecto.Changeset.change(%{local: true, updated_at: old_insert_date})
|
||||||
|
|> Repo.update!()
|
||||||
|
|
||||||
|
# local Like
|
||||||
|
{:ok, old_remote_post3_activity} =
|
||||||
|
CommonAPI.post(remote_user, %{status: "some thing", local: false})
|
||||||
|
|
||||||
|
old_remote_post3_activity
|
||||||
|
|> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
|
||||||
|
|> Repo.update!()
|
||||||
|
|
||||||
|
{:ok, old_favourite_activity} = CommonAPI.favorite(local_user, old_remote_post3_activity.id)
|
||||||
|
|
||||||
|
old_favourite_activity
|
||||||
|
|> Ecto.Changeset.change(%{local: true, updated_at: old_insert_date})
|
||||||
|
|> Repo.update!()
|
||||||
|
|
||||||
|
# local Announce
|
||||||
|
{:ok, old_remote_post4_activity} =
|
||||||
|
CommonAPI.post(remote_user, %{status: "some thing", local: false})
|
||||||
|
|
||||||
|
old_remote_post4_activity
|
||||||
|
|> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
|
||||||
|
|> Repo.update!()
|
||||||
|
|
||||||
|
{:ok, old_repeat_activity} = CommonAPI.repeat(old_remote_post4_activity.id, local_user)
|
||||||
|
|
||||||
|
old_repeat_activity
|
||||||
|
|> Ecto.Changeset.change(%{local: true, updated_at: old_insert_date})
|
||||||
|
|> Repo.update!()
|
||||||
|
|
||||||
|
assert length(Repo.all(Object)) == 4
|
||||||
|
|
||||||
|
Mix.Tasks.Pleroma.Database.run(["prune_objects", "--keep-threads"])
|
||||||
|
|
||||||
|
assert length(Repo.all(Object)) == 4
|
||||||
|
end
|
||||||
|
|
||||||
|
test "with the --keep-threads option it keeps old threads with bookmarked posts" do
|
||||||
|
deadline = Pleroma.Config.get([:instance, :remote_post_retention_days]) + 1
|
||||||
|
|
||||||
|
old_insert_date =
|
||||||
|
Timex.now()
|
||||||
|
|> Timex.shift(days: -deadline)
|
||||||
|
|> Timex.to_naive_datetime()
|
||||||
|
|> NaiveDateTime.truncate(:second)
|
||||||
|
|
||||||
|
remote_user = insert(:user, local: false)
|
||||||
|
local_user = insert(:user, local: true)
|
||||||
|
|
||||||
|
{:ok, old_remote_post_activity} =
|
||||||
|
CommonAPI.post(remote_user, %{status: "some thing", local: false})
|
||||||
|
|
||||||
|
old_remote_post_activity
|
||||||
|
|> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
|
||||||
|
|> Repo.update!()
|
||||||
|
|
||||||
|
Pleroma.Bookmark.create(local_user.id, old_remote_post_activity.id)
|
||||||
|
|
||||||
|
assert length(Repo.all(Object)) == 1
|
||||||
|
|
||||||
|
Mix.Tasks.Pleroma.Database.run(["prune_objects", "--keep-threads"])
|
||||||
|
|
||||||
|
assert length(Repo.all(Object)) == 1
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
describe "running update_users_following_followers_counts" do
|
describe "running update_users_following_followers_counts" do
|
||||||
|
|
Loading…
Reference in a new issue