From c2b0b82e6a6d40f945feacc001ad984a17e23336 Mon Sep 17 00:00:00 2001 From: William Pitcock Date: Tue, 21 May 2019 00:41:40 +0000 Subject: [PATCH 1/5] object: add Object.prune() --- lib/pleroma/object.ex | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lib/pleroma/object.ex b/lib/pleroma/object.ex index 740d687a3..cc6fc9c5d 100644 --- a/lib/pleroma/object.ex +++ b/lib/pleroma/object.ex @@ -130,6 +130,13 @@ def delete(%Object{data: %{"id" => id}} = object) do end end + def prune(%Object{data: %{"id" => id}} = object) do + with {:ok, object} <- Repo.delete(object), + {:ok, true} <- Cachex.del(:object_cache, "object:#{id}") do + {:ok, object} + end + end + def set_cache(%Object{data: %{"id" => ap_id}} = object) do Cachex.put(:object_cache, "object:#{ap_id}", object) {:ok, object} From 73df9d690d5c1a9c11f0f04b8d877c0677022591 Mon Sep 17 00:00:00 2001 From: William Pitcock Date: Tue, 21 May 2019 00:41:58 +0000 Subject: [PATCH 2/5] object: fetcher: add support for reinjecting pruned objects --- lib/pleroma/object/fetcher.ex | 22 ++++++++++++++++++++-- test/object/fetcher_test.exs | 19 +++++++++++++++++++ 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/lib/pleroma/object/fetcher.ex b/lib/pleroma/object/fetcher.ex index 8d4bcc95e..bb9388d4f 100644 --- a/lib/pleroma/object/fetcher.ex +++ b/lib/pleroma/object/fetcher.ex @@ -8,6 +8,19 @@ defmodule Pleroma.Object.Fetcher do @httpoison Application.get_env(:pleroma, :httpoison) + defp reinject_object(data) do + Logger.debug("Reinjecting object #{data["id"]}") + + with data <- Transmogrifier.fix_object(data), + {:ok, object} <- Object.create(data) do + {:ok, object} + else + e -> + Logger.error("Error while processing object: #{inspect(e)}") + {:error, e} + end + end + # TODO: # This will create a Create activity, which we need internally at the moment. def fetch_object_from_id(id) do @@ -26,12 +39,17 @@ def fetch_object_from_id(id) do "object" => data }, :ok <- Containment.contain_origin(id, params), - {:ok, activity} <- Transmogrifier.handle_incoming(params) do - {:ok, Object.normalize(activity, false)} + {:ok, activity} <- Transmogrifier.handle_incoming(params), + {:object, _data, %Object{} = object} <- + {:object, data, Object.normalize(activity, false)} do + {:ok, object} else {:error, {:reject, nil}} -> {:reject, nil} + {:object, data, nil} -> + reinject_object(data) + object = %Object{} -> {:ok, object} diff --git a/test/object/fetcher_test.exs b/test/object/fetcher_test.exs index 72f616782..d604fd5f5 100644 --- a/test/object/fetcher_test.exs +++ b/test/object/fetcher_test.exs @@ -87,4 +87,23 @@ test "all objects with fake directions are rejected by the object fetcher" do ) end end + + describe "pruning" do + test "it can refetch pruned objects" do + object_id = "http://mastodon.example.org/@admin/99541947525187367" + + {:ok, object} = Fetcher.fetch_object_from_id(object_id) + + assert object + + {:ok, _object} = Object.prune(object) + + refute Object.get_by_ap_id(object_id) + + {:ok, %Object{} = object_two} = Fetcher.fetch_object_from_id(object_id) + + assert object.data["id"] == object_two.data["id"] + assert object.id != object_two.id + end + end end From 16b260fb19cca02463766c2e36a41bfcc823af9b Mon Sep 17 00:00:00 2001 From: William Pitcock Date: Tue, 21 May 2019 01:21:28 +0000 Subject: [PATCH 3/5] add mix task to prune the object database using a configured retention period --- config/config.exs | 3 ++- docs/config.md | 1 + lib/mix/tasks/pleroma/database.ex | 40 +++++++++++++++++++++++++++++++ 3 files changed, 43 insertions(+), 1 deletion(-) diff --git a/config/config.exs b/config/config.exs index 72908266d..466a6e9b7 100644 --- a/config/config.exs +++ b/config/config.exs @@ -239,7 +239,8 @@ welcome_message: nil, max_report_comment_size: 1000, safe_dm_mentions: false, - healthcheck: false + healthcheck: false, + remote_post_retention_days: 90 config :pleroma, :app_account_creation, enabled: true, max_requests: 25, interval: 1800 diff --git a/docs/config.md b/docs/config.md index 197326bbd..a050068f4 100644 --- a/docs/config.md +++ b/docs/config.md @@ -104,6 +104,7 @@ config :pleroma, Pleroma.Emails.Mailer, * `max_report_comment_size`: The maximum size of the report comment (Default: `1000`) * `safe_dm_mentions`: If set to true, only mentions at the beginning of a post will be used to address people in direct messages. This is to prevent accidental mentioning of people when talking about them (e.g. "@friend hey i really don't like @enemy"). (Default: `false`) * `healthcheck`: if set to true, system data will be shown on ``/api/pleroma/healthcheck``. +* `remote_post_retention_days`: the default amount of days to retain remote posts when pruning the database ## :app_account_creation REST API for creating an account settings diff --git a/lib/mix/tasks/pleroma/database.ex b/lib/mix/tasks/pleroma/database.ex index f650b447d..fdb216037 100644 --- a/lib/mix/tasks/pleroma/database.ex +++ b/lib/mix/tasks/pleroma/database.ex @@ -23,6 +23,10 @@ defmodule Mix.Tasks.Pleroma.Database do Options: - `--vacuum` - run `VACUUM FULL` after the embedded objects are replaced with their references + ## Prune old objects from the database + + mix pleroma.database prune_objects + ## Create a conversation for all existing DMs. Can be safely re-run. mix pleroma.database bump_all_conversations @@ -72,4 +76,40 @@ def run(["update_users_following_followers_counts"]) do Enum.each(users, &User.remove_duplicated_following/1) Enum.each(users, &User.update_follower_count/1) end + + def run(["prune_objects" | args]) do + {options, [], []} = + OptionParser.parse( + args, + strict: [ + vacuum: :boolean + ] + ) + + Common.start_pleroma() + + deadline = Pleroma.Config.get([:instance, :remote_post_retention_days]) + + Logger.info("Pruning objects older than #{deadline} days") + + time_deadline = + NaiveDateTime.utc_now() + |> NaiveDateTime.add(-(deadline * 86_400)) + + Repo.query!( + "DELETE FROM objects WHERE inserted_at < $1 AND split_part(data->>'actor', '/', 3) != $2", + [time_deadline, Pleroma.Web.Endpoint.host()], + timeout: :infinity + ) + + if Keyword.get(options, :vacuum) do + Logger.info("Runnning VACUUM FULL") + + Repo.query!( + "vacuum full;", + [], + timeout: :infinity + ) + end + end end From f446f94290a6cdae531859c2efa55f55dbaeb7e8 Mon Sep 17 00:00:00 2001 From: William Pitcock Date: Tue, 21 May 2019 01:22:27 +0000 Subject: [PATCH 4/5] add changelog entry for object pruning --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 256df91b7..a21c4bff2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -39,6 +39,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - Metadata: RelMe provider - OAuth: added support for refresh tokens - Emoji packs and emoji pack manager +- Object pruning (`mix pleroma.database prune_objects`) ### Changed - **Breaking:** Configuration: move from Pleroma.Mailer to Pleroma.Emails.Mailer From a023ca004cbd90e330cab35e4dfda16346d08668 Mon Sep 17 00:00:00 2001 From: William Pitcock Date: Wed, 22 May 2019 03:12:48 +0000 Subject: [PATCH 5/5] prune objects task: use Repo.delete_all() --- lib/mix/tasks/pleroma/database.ex | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/lib/mix/tasks/pleroma/database.ex b/lib/mix/tasks/pleroma/database.ex index fdb216037..f9bafb277 100644 --- a/lib/mix/tasks/pleroma/database.ex +++ b/lib/mix/tasks/pleroma/database.ex @@ -5,6 +5,7 @@ defmodule Mix.Tasks.Pleroma.Database do alias Mix.Tasks.Pleroma.Common alias Pleroma.Conversation + alias Pleroma.Object alias Pleroma.Repo alias Pleroma.User require Logger @@ -78,6 +79,8 @@ def run(["update_users_following_followers_counts"]) do end def run(["prune_objects" | args]) do + import Ecto.Query + {options, [], []} = OptionParser.parse( args, @@ -96,11 +99,15 @@ def run(["prune_objects" | args]) do NaiveDateTime.utc_now() |> NaiveDateTime.add(-(deadline * 86_400)) - Repo.query!( - "DELETE FROM objects WHERE inserted_at < $1 AND split_part(data->>'actor', '/', 3) != $2", - [time_deadline, Pleroma.Web.Endpoint.host()], - timeout: :infinity + public = "https://www.w3.org/ns/activitystreams#Public" + + from(o in Object, + where: fragment("?->'to' \\? ? OR ?->'cc' \\? ?", o.data, ^public, o.data, ^public), + where: o.inserted_at < ^time_deadline, + where: + fragment("split_part(?->>'actor', '/', 3) != ?", o.data, ^Pleroma.Web.Endpoint.host()) ) + |> Repo.delete_all() if Keyword.get(options, :vacuum) do Logger.info("Runnning VACUUM FULL")