From eb503f093c4863063d36a0570c257e8d9ef4cc70 Mon Sep 17 00:00:00 2001 From: ilja Date: Wed, 7 Dec 2022 20:52:16 +0100 Subject: [PATCH 1/4] Prune Objects --keep-threads This adds an option to the prune_objects mix task. The original way deleted all non-local public posts older than a certain time frame. Here we add a different query which you can call using the option --keep-threads. We query from the activities table all context id's where 1. the newest activity with this context is still old 2. none of the activities with this context is is local 3. none of the activities with this context is bookmarked and delete all objects with these contexts. The idea is that posts with local activities (posts, replies, likes, repeats...) may be intersesting to keep. Besides that, a post lives in a certain context (the thread), so we keep the whole thread as well. Caveats: * Quotes have a different context. Therefore, when someone quotes a post, it's possible the quoted post will still be deleted. * Although undocumented (in docs/docs/administration/CLI_tasks/database.md/#prune-old-remote-posts-from-the-database), the 'normal' delete action still keeps old remote non-public posts. With this option we don't care about scope. * I ran this on my instance, but directly on the DB. I still need to test to be sure that we don't get a time-out error or something. Some statistics from explain analyse: (cost=1402845.92..1933782.00 rows=3810907 width=62) (actual time=2562455.486..2562455.495 rows=0 loops=1) Planning Time: 505.327 ms Trigger for constraint chat_message_references_object_id_fkey: time=651939.797 calls=921740 Trigger for constraint deliveries_object_id_fkey: time=52036.009 calls=921740 Trigger for constraint hashtags_objects_object_id_fkey: time=20665.778 calls=921740 Execution Time: 3287933.902 ms --- lib/mix/tasks/pleroma/database.ex | 60 +++++--- test/mix/tasks/pleroma/database_test.exs | 170 +++++++++++++++++++++++ 2 files changed, 212 insertions(+), 18 deletions(-) diff --git a/lib/mix/tasks/pleroma/database.ex b/lib/mix/tasks/pleroma/database.ex index 272c9e3e5..0e37a29f6 100644 --- a/lib/mix/tasks/pleroma/database.ex +++ b/lib/mix/tasks/pleroma/database.ex @@ -67,7 +67,8 @@ defmodule Mix.Tasks.Pleroma.Database do OptionParser.parse( args, strict: [ - vacuum: :boolean + vacuum: :boolean, + keep_threads: :boolean ] ) @@ -77,24 +78,47 @@ defmodule Mix.Tasks.Pleroma.Database do Logger.info("Pruning objects older than #{deadline} days") - time_deadline = - NaiveDateTime.utc_now() - |> NaiveDateTime.add(-(deadline * 86_400)) + if Keyword.get(options, :keep_threads) do + # We delete objects from threads where + # 1. the newest post is still old + # 2. none of the activities is local + # 3. none of the activities is bookmarked + delete_keep_threads_statement = """ + delete + from public.objects o + where o.data ->> 'context' in ( + select + a.data ->> 'context' + from public.activities a + left join public.bookmarks b on a.id = b.activity_id + group by (a.data ->> 'context'::text) + having max(a.updated_at) < now() - interval '#{deadline} day' + and not bool_or(a.local) + and max(b.id) is null + ); + """ - from(o in Object, - where: - fragment( - "?->'to' \\? ? OR ?->'cc' \\? ?", - o.data, - ^Pleroma.Constants.as_public(), - o.data, - ^Pleroma.Constants.as_public() - ), - where: o.inserted_at < ^time_deadline, - where: - fragment("split_part(?->>'actor', '/', 3) != ?", o.data, ^Pleroma.Web.Endpoint.host()) - ) - |> Repo.delete_all(timeout: :infinity) + Repo.query(delete_keep_threads_statement) + else + time_deadline = + NaiveDateTime.utc_now() + |> NaiveDateTime.add(-(deadline * 86_400)) + + from(o in Object, + where: + fragment( + "?->'to' \\? ? OR ?->'cc' \\? ?", + o.data, + ^Pleroma.Constants.as_public(), + o.data, + ^Pleroma.Constants.as_public() + ), + where: o.inserted_at < ^time_deadline, + where: + fragment("split_part(?->>'actor', '/', 3) != ?", o.data, ^Pleroma.Web.Endpoint.host()) + ) + |> Repo.delete_all(timeout: :infinity) + end prune_hashtags_query = """ DELETE FROM hashtags AS ht diff --git a/test/mix/tasks/pleroma/database_test.exs b/test/mix/tasks/pleroma/database_test.exs index 7a1a759da..07de9b448 100644 --- a/test/mix/tasks/pleroma/database_test.exs +++ b/test/mix/tasks/pleroma/database_test.exs @@ -68,6 +68,176 @@ defmodule Mix.Tasks.Pleroma.DatabaseTest do assert length(Repo.all(Object)) == 1 refute Object.get_by_id(id) end + + test "with the --keep-threads option it still keeps non-old threads even with no local interactions" do + remote_user = insert(:user, local: false) + remote_user2 = insert(:user, local: false) + + {:ok, remote_post_activity} = + CommonAPI.post(remote_user, %{status: "some thing", local: false}) + + {:ok, remote_post_reply_activity} = + CommonAPI.post(remote_user2, %{ + status: "some reply", + in_reply_to_status_id: remote_post_activity.id + }) + + remote_post_activity + |> Ecto.Changeset.change(%{local: false}) + |> Repo.update!() + + remote_post_reply_activity + |> Ecto.Changeset.change(%{local: false}) + |> Repo.update!() + + assert length(Repo.all(Object)) == 2 + + Mix.Tasks.Pleroma.Database.run(["prune_objects", "--keep-threads"]) + + assert length(Repo.all(Object)) == 2 + end + + test "with the --keep-threads option it deletes old threads with no local interaction" do + deadline = Pleroma.Config.get([:instance, :remote_post_retention_days]) + 1 + + old_insert_date = + Timex.now() + |> Timex.shift(days: -deadline) + |> Timex.to_naive_datetime() + |> NaiveDateTime.truncate(:second) + + remote_user = insert(:user, local: false) + remote_user2 = insert(:user, local: false) + + {:ok, old_remote_post_activity} = + CommonAPI.post(remote_user, %{status: "some thing", local: false}) + + old_remote_post_activity + |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date}) + |> Repo.update!() + + {:ok, old_remote_post_reply_activity} = + CommonAPI.post(remote_user2, %{ + status: "some reply", + in_reply_to_status_id: old_remote_post_activity.id + }) + + old_remote_post_reply_activity + |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date}) + |> Repo.update!() + + {:ok, old_favourite_activity} = + CommonAPI.favorite(remote_user2, old_remote_post_activity.id) + + old_favourite_activity + |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date}) + |> Repo.update!() + + {:ok, old_repeat_activity} = CommonAPI.repeat(old_remote_post_activity.id, remote_user2) + + old_repeat_activity + |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date}) + |> Repo.update!() + + assert length(Repo.all(Object)) == 2 + + Mix.Tasks.Pleroma.Database.run(["prune_objects", "--keep-threads"]) + + assert Repo.all(Object) == [] + end + + test "with the --keep-threads option it keeps old threads with local interaction" do + deadline = Pleroma.Config.get([:instance, :remote_post_retention_days]) + 1 + + old_insert_date = + Timex.now() + |> Timex.shift(days: -deadline) + |> Timex.to_naive_datetime() + |> NaiveDateTime.truncate(:second) + + remote_user = insert(:user, local: false) + local_user = insert(:user, local: true) + + # local reply + {:ok, old_remote_post1_activity} = + CommonAPI.post(remote_user, %{status: "some thing", local: false}) + + old_remote_post1_activity + |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date}) + |> Repo.update!() + + {:ok, old_local_post2_reply_activity} = + CommonAPI.post(local_user, %{ + status: "some reply", + in_reply_to_status_id: old_remote_post1_activity.id + }) + + old_local_post2_reply_activity + |> Ecto.Changeset.change(%{local: true, updated_at: old_insert_date}) + |> Repo.update!() + + # local Like + {:ok, old_remote_post3_activity} = + CommonAPI.post(remote_user, %{status: "some thing", local: false}) + + old_remote_post3_activity + |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date}) + |> Repo.update!() + + {:ok, old_favourite_activity} = CommonAPI.favorite(local_user, old_remote_post3_activity.id) + + old_favourite_activity + |> Ecto.Changeset.change(%{local: true, updated_at: old_insert_date}) + |> Repo.update!() + + # local Announce + {:ok, old_remote_post4_activity} = + CommonAPI.post(remote_user, %{status: "some thing", local: false}) + + old_remote_post4_activity + |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date}) + |> Repo.update!() + + {:ok, old_repeat_activity} = CommonAPI.repeat(old_remote_post4_activity.id, local_user) + + old_repeat_activity + |> Ecto.Changeset.change(%{local: true, updated_at: old_insert_date}) + |> Repo.update!() + + assert length(Repo.all(Object)) == 4 + + Mix.Tasks.Pleroma.Database.run(["prune_objects", "--keep-threads"]) + + assert length(Repo.all(Object)) == 4 + end + + test "with the --keep-threads option it keeps old threads with bookmarked posts" do + deadline = Pleroma.Config.get([:instance, :remote_post_retention_days]) + 1 + + old_insert_date = + Timex.now() + |> Timex.shift(days: -deadline) + |> Timex.to_naive_datetime() + |> NaiveDateTime.truncate(:second) + + remote_user = insert(:user, local: false) + local_user = insert(:user, local: true) + + {:ok, old_remote_post_activity} = + CommonAPI.post(remote_user, %{status: "some thing", local: false}) + + old_remote_post_activity + |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date}) + |> Repo.update!() + + Pleroma.Bookmark.create(local_user.id, old_remote_post_activity.id) + + assert length(Repo.all(Object)) == 1 + + Mix.Tasks.Pleroma.Database.run(["prune_objects", "--keep-threads"]) + + assert length(Repo.all(Object)) == 1 + end end describe "running update_users_following_followers_counts" do -- 2.34.1 From 04cc1d41ce80a4a506b4ed0a6544fa4e1f1f40f6 Mon Sep 17 00:00:00 2001 From: ilja Date: Sat, 10 Dec 2022 19:29:04 +0100 Subject: [PATCH 2/4] Build prune_objects --keep-threads query with Ecto The query is now done using Ecto. I also ran it on a local DB. It Went from 4000834 records to 1734648 in about an hour without timeout. --- lib/mix/tasks/pleroma/database.ex | 40 ++++++++++++++----------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/lib/mix/tasks/pleroma/database.ex b/lib/mix/tasks/pleroma/database.ex index 0e37a29f6..9862c5ecd 100644 --- a/lib/mix/tasks/pleroma/database.ex +++ b/lib/mix/tasks/pleroma/database.ex @@ -75,34 +75,30 @@ defmodule Mix.Tasks.Pleroma.Database do start_pleroma() deadline = Pleroma.Config.get([:instance, :remote_post_retention_days]) - - Logger.info("Pruning objects older than #{deadline} days") + time_deadline = NaiveDateTime.utc_now() |> NaiveDateTime.add(-(deadline * 86_400)) if Keyword.get(options, :keep_threads) do - # We delete objects from threads where + Logger.info( + "Pruning objects older than #{deadline} days without local interaction, keeping threads intact" + ) + + # We want to delete objects from threads where # 1. the newest post is still old # 2. none of the activities is local # 3. none of the activities is bookmarked - delete_keep_threads_statement = """ - delete - from public.objects o - where o.data ->> 'context' in ( - select - a.data ->> 'context' - from public.activities a - left join public.bookmarks b on a.id = b.activity_id - group by (a.data ->> 'context'::text) - having max(a.updated_at) < now() - interval '#{deadline} day' - and not bool_or(a.local) - and max(b.id) is null - ); - """ + deletable_context = + Pleroma.Activity + |> join(:left, [a], b in Pleroma.Bookmark, on: a.id == b.activity_id) + |> group_by([a], fragment("? ->> 'context'::text", a.data)) + |> having([a], max(a.updated_at) < ^time_deadline) + |> having([a], not fragment("bool_or(?)", a.local)) + |> having([a, b], fragment("max(?::text) is null", b.id)) + |> select([a], fragment("? ->> 'context'::text", a.data)) - Repo.query(delete_keep_threads_statement) + Pleroma.Object + |> where([o], fragment("? ->> 'context'::text", o.data) in subquery(deletable_context)) else - time_deadline = - NaiveDateTime.utc_now() - |> NaiveDateTime.add(-(deadline * 86_400)) + Logger.info("Pruning objects older than #{deadline} days") from(o in Object, where: @@ -117,8 +113,8 @@ defmodule Mix.Tasks.Pleroma.Database do where: fragment("split_part(?->>'actor', '/', 3) != ?", o.data, ^Pleroma.Web.Endpoint.host()) ) - |> Repo.delete_all(timeout: :infinity) end + |> Repo.delete_all(timeout: :infinity) prune_hashtags_query = """ DELETE FROM hashtags AS ht -- 2.34.1 From 92d2f8b401c9820546ea4330d9d5e3b0c0825019 Mon Sep 17 00:00:00 2001 From: ilja Date: Sun, 11 Dec 2022 14:08:07 +0100 Subject: [PATCH 3/4] Add --keep-non-public option The prune_objects task already did this by default, but is undocumented. Now we require an explicit parameter for it. The parameter also works in combination with --keep-threads Docs still needs to happen --- lib/mix/tasks/pleroma/database.ex | 73 ++++++++++--- test/mix/tasks/pleroma/database_test.exs | 127 +++++++++++++++++++++-- 2 files changed, 177 insertions(+), 23 deletions(-) diff --git a/lib/mix/tasks/pleroma/database.ex b/lib/mix/tasks/pleroma/database.ex index 9862c5ecd..be59e2271 100644 --- a/lib/mix/tasks/pleroma/database.ex +++ b/lib/mix/tasks/pleroma/database.ex @@ -68,7 +68,8 @@ defmodule Mix.Tasks.Pleroma.Database do args, strict: [ vacuum: :boolean, - keep_threads: :boolean + keep_threads: :boolean, + keep_non_public: :boolean ] ) @@ -77,41 +78,79 @@ defmodule Mix.Tasks.Pleroma.Database do deadline = Pleroma.Config.get([:instance, :remote_post_retention_days]) time_deadline = NaiveDateTime.utc_now() |> NaiveDateTime.add(-(deadline * 86_400)) - if Keyword.get(options, :keep_threads) do - Logger.info( - "Pruning objects older than #{deadline} days without local interaction, keeping threads intact" - ) + log_message = "Pruning objects older than #{deadline} days" + log_message = + if Keyword.get(options, :keep_non_public) do + log_message <> ", keeping non public posts" + else + log_message + end + + log_message = + if Keyword.get(options, :keep_threads) do + log_message <> ", keeping threads intact" + else + log_message + end + + Logger.info(log_message) + + if Keyword.get(options, :keep_threads) do # We want to delete objects from threads where # 1. the newest post is still old # 2. none of the activities is local # 3. none of the activities is bookmarked + # 4. optionally none of the posts is non-public deletable_context = - Pleroma.Activity - |> join(:left, [a], b in Pleroma.Bookmark, on: a.id == b.activity_id) - |> group_by([a], fragment("? ->> 'context'::text", a.data)) + if Keyword.get(options, :keep_non_public) do + Pleroma.Activity + |> join(:left, [a], b in Pleroma.Bookmark, on: a.id == b.activity_id) + |> group_by([a], fragment("? ->> 'context'::text", a.data)) + |> having( + [a], + not fragment( + # Posts (checked on Create Activity) is non-public + "bool_or((not(?->'to' \\? ? OR ?->'cc' \\? ?)) and ? ->> 'type' = 'Create')", + a.data, + ^Pleroma.Constants.as_public(), + a.data, + ^Pleroma.Constants.as_public(), + a.data + ) + ) + else + Pleroma.Activity + |> join(:left, [a], b in Pleroma.Bookmark, on: a.id == b.activity_id) + |> group_by([a], fragment("? ->> 'context'::text", a.data)) + end |> having([a], max(a.updated_at) < ^time_deadline) |> having([a], not fragment("bool_or(?)", a.local)) - |> having([a, b], fragment("max(?::text) is null", b.id)) + |> having([_, b], fragment("max(?::text) is null", b.id)) |> select([a], fragment("? ->> 'context'::text", a.data)) Pleroma.Object |> where([o], fragment("? ->> 'context'::text", o.data) in subquery(deletable_context)) else - Logger.info("Pruning objects older than #{deadline} days") - - from(o in Object, - where: + if Keyword.get(options, :keep_non_public) do + Pleroma.Object + |> where( + [o], fragment( "?->'to' \\? ? OR ?->'cc' \\? ?", o.data, ^Pleroma.Constants.as_public(), o.data, ^Pleroma.Constants.as_public() - ), - where: o.inserted_at < ^time_deadline, - where: - fragment("split_part(?->>'actor', '/', 3) != ?", o.data, ^Pleroma.Web.Endpoint.host()) + ) + ) + else + Pleroma.Object + end + |> where([o], o.updated_at < ^time_deadline) + |> where( + [o], + fragment("split_part(?->>'actor', '/', 3) != ?", o.data, ^Pleroma.Web.Endpoint.host()) ) end |> Repo.delete_all(timeout: :infinity) diff --git a/test/mix/tasks/pleroma/database_test.exs b/test/mix/tasks/pleroma/database_test.exs index 07de9b448..447a4404e 100644 --- a/test/mix/tasks/pleroma/database_test.exs +++ b/test/mix/tasks/pleroma/database_test.exs @@ -46,7 +46,6 @@ defmodule Mix.Tasks.Pleroma.DatabaseTest do describe "prune_objects" do test "it prunes old objects from the database" do - insert(:note) deadline = Pleroma.Config.get([:instance, :remote_post_retention_days]) + 1 date = @@ -55,18 +54,134 @@ defmodule Mix.Tasks.Pleroma.DatabaseTest do |> Timex.to_naive_datetime() |> NaiveDateTime.truncate(:second) - %{id: id} = + insert(:note) + + %{id: note_remote_public_id} = :note |> insert() - |> Ecto.Changeset.change(%{inserted_at: date}) + |> Ecto.Changeset.change(%{updated_at: date}) |> Repo.update!() - assert length(Repo.all(Object)) == 2 + note_remote_non_public = + %{id: note_remote_non_public_id, data: note_remote_non_public_data} = + :note + |> insert() + + note_remote_non_public + |> Ecto.Changeset.change(%{ + updated_at: date, + data: note_remote_non_public_data |> update_in(["to"], fn _ -> [] end) + }) + |> Repo.update!() + + assert length(Repo.all(Object)) == 3 Mix.Tasks.Pleroma.Database.run(["prune_objects"]) assert length(Repo.all(Object)) == 1 - refute Object.get_by_id(id) + refute Object.get_by_id(note_remote_public_id) + refute Object.get_by_id(note_remote_non_public_id) + end + + test "with the --keep-non-public option it still keeps non-public posts even if they are not local" do + deadline = Pleroma.Config.get([:instance, :remote_post_retention_days]) + 1 + + date = + Timex.now() + |> Timex.shift(days: -deadline) + |> Timex.to_naive_datetime() + |> NaiveDateTime.truncate(:second) + + insert(:note) + + %{id: note_remote_id} = + :note + |> insert() + |> Ecto.Changeset.change(%{updated_at: date}) + |> Repo.update!() + + note_remote_non_public = + %{data: note_remote_non_public_data} = + :note + |> insert() + + note_remote_non_public + |> Ecto.Changeset.change(%{ + updated_at: date, + data: note_remote_non_public_data |> update_in(["to"], fn _ -> [] end) + }) + |> Repo.update!() + + assert length(Repo.all(Object)) == 3 + + Mix.Tasks.Pleroma.Database.run(["prune_objects", "--keep-non-public"]) + + assert length(Repo.all(Object)) == 2 + refute Object.get_by_id(note_remote_id) + end + + test "with the --keep-threads and --keep-non-public option it keeps old threads with non-public replies even if the interaction is not local" do + # For non-public we only check Create Activities because only these are relevant for threads + # Flags are always non-public, Announces from relays can be non-public... + deadline = Pleroma.Config.get([:instance, :remote_post_retention_days]) + 1 + + old_insert_date = + Timex.now() + |> Timex.shift(days: -deadline) + |> Timex.to_naive_datetime() + |> NaiveDateTime.truncate(:second) + + remote_user1 = insert(:user, local: false) + remote_user2 = insert(:user, local: false) + + # Old remote non-public reply (should be kept) + {:ok, old_remote_post1_activity} = + CommonAPI.post(remote_user1, %{status: "some thing", local: false}) + + old_remote_post1_activity + |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date}) + |> Repo.update!() + + {:ok, old_remote_non_public_reply_activity} = + CommonAPI.post(remote_user2, %{ + status: "some reply", + in_reply_to_status_id: old_remote_post1_activity.id + }) + + old_remote_non_public_reply_activity + |> Ecto.Changeset.change(%{ + local: false, + updated_at: old_insert_date, + data: old_remote_non_public_reply_activity.data |> update_in(["to"], fn _ -> [] end) + }) + |> Repo.update!() + + # Old remote non-public Announce (should be removed) + {:ok, old_remote_post2_activity = %{data: %{"object" => old_remote_post2_id}}} = + CommonAPI.post(remote_user1, %{status: "some thing", local: false}) + + old_remote_post2_activity + |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date}) + |> Repo.update!() + + {:ok, old_remote_non_public_repeat_activity} = + CommonAPI.repeat(old_remote_post2_activity.id, remote_user2) + + old_remote_non_public_repeat_activity + |> Ecto.Changeset.change(%{ + local: false, + updated_at: old_insert_date, + data: old_remote_non_public_repeat_activity.data |> update_in(["to"], fn _ -> [] end) + }) + |> Repo.update!() + + assert length(Repo.all(Object)) == 3 + + Mix.Tasks.Pleroma.Database.run(["prune_objects", "--keep-threads", "--keep-non-public"]) + + Repo.all(Pleroma.Activity) + assert length(Repo.all(Object)) == 2 + refute Object.get_by_ap_id(old_remote_post2_id) end test "with the --keep-threads option it still keeps non-old threads even with no local interactions" do @@ -143,7 +258,7 @@ defmodule Mix.Tasks.Pleroma.DatabaseTest do Mix.Tasks.Pleroma.Database.run(["prune_objects", "--keep-threads"]) - assert Repo.all(Object) == [] + assert length(Repo.all(Object)) == 0 end test "with the --keep-threads option it keeps old threads with local interaction" do -- 2.34.1 From f1739ac17bb87331d0d77208c735f6eefab15246 Mon Sep 17 00:00:00 2001 From: ilja Date: Wed, 4 Jan 2023 19:19:07 +0100 Subject: [PATCH 4/4] Adapt docs for prune_objects --- CHANGELOG.md | 1 + docs/docs/administration/CLI_tasks/database.md | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8e638bdd8..c3e88f071 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - Admin scopes will be dropped on create - Rich media will now backoff for 20 minutes after a failure - Quote posts are now considered as part of the same thread as the post they are quoting +- Extend the mix task `prune_objects` with options to keep more relevant posts - Simplified HTTP signature processing - Rich media will now hard-exit after 5 seconds, to prevent timeline hangs - HTTP Content Security Policy is now far more strict to prevent any potential XSS/CSS leakages diff --git a/docs/docs/administration/CLI_tasks/database.md b/docs/docs/administration/CLI_tasks/database.md index 73419dc81..915139cf7 100644 --- a/docs/docs/administration/CLI_tasks/database.md +++ b/docs/docs/administration/CLI_tasks/database.md @@ -27,7 +27,7 @@ Replaces embedded objects with references to them in the `objects` table. Only n ## Prune old remote posts from the database -This will prune remote posts older than 90 days (configurable with [`config :pleroma, :instance, remote_post_retention_days`](../../configuration/cheatsheet.md#instance)) from the database, they will be refetched from source when accessed. +This will prune remote posts older than 90 days (configurable with [`config :pleroma, :instance, remote_post_retention_days`](../../configuration/cheatsheet.md#instance)) from the database. Pruned posts may be refetched in some cases. !!! danger The disk space will only be reclaimed after `VACUUM FULL`. You may run out of disk space during the execution of the task or vacuuming if you don't have about 1/3rds of the database size free. @@ -45,6 +45,9 @@ This will prune remote posts older than 90 days (configurable with [`config :ple ``` ### Options + +- `--keep-threads` - don't prune posts when they are part of a thread where at least one post has seen local interaction (e.g. one of the posts is a local post, or is favourited by a local user, or has been repeated by a local user...) +- `--keep-non-public` - keep non-public posts like DM's and followers-only, even if they are remote - `--vacuum` - run `VACUUM FULL` after the objects are pruned ## Create a conversation for all existing DMs @@ -178,4 +181,4 @@ to the current day. ```sh mix pleroma.database prune_task - ``` \ No newline at end of file + ``` -- 2.34.1