Add attachment cleanup patches

This commit is contained in:
Oneric 2024-10-03 21:08:34 +02:00
parent d4023cebd5
commit 22a486383d
3 changed files with 312 additions and 0 deletions

View file

@ -0,0 +1,201 @@
From da064d63b86a00434d16ace6217f18af7d18d5e1 Mon Sep 17 00:00:00 2001
From: Oneric <oneric@oneric.stub>
Date: Sun, 2 Jun 2024 21:42:36 +0200
Subject: [PATCH] Don't try to cleanup remote attachments
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The cleanup attachment worker was run for every deleted post,
even if its a remote post whose attachments we don't even store.
This was especially bad due to attachment cleanup involving a
particularly heavy query wasting a bunch of database perf for nil.
This was uncovered by comparing statistics from
https://akkoma.dev/AkkomaGang/akkoma/issues/784 and
https://akkoma.dev/AkkomaGang/akkoma/issues/765#issuecomment-12256
---
lib/pleroma/object.ex | 15 +----
.../workers/attachments_cleanup_worker.ex | 49 ++++++++++++---
.../attachments_cleanup_worker_test.exs | 60 +++++++++++++++++++
3 files changed, 101 insertions(+), 23 deletions(-)
create mode 100644 test/pleroma/workers/attachments_cleanup_worker_test.exs
diff --git a/lib/pleroma/object.ex b/lib/pleroma/object.ex
index 379b361f8..5d84bb286 100644
--- a/lib/pleroma/object.ex
+++ b/lib/pleroma/object.ex
@@ -9,7 +9,6 @@ defmodule Pleroma.Object do
import Ecto.Changeset
alias Pleroma.Activity
- alias Pleroma.Config
alias Pleroma.Hashtag
alias Pleroma.Object
alias Pleroma.Object.Fetcher
@@ -241,23 +240,11 @@ defmodule Pleroma.Object do
with {:ok, _obj} = swap_object_with_tombstone(object),
deleted_activity = Activity.delete_all_by_object_ap_id(id),
{:ok, _} <- invalid_object_cache(object) do
- cleanup_attachments(
- Config.get([:instance, :cleanup_attachments]),
- %{object: object}
- )
-
+ AttachmentsCleanupWorker.enqueue_if_needed(object.data)
{:ok, object, deleted_activity}
end
end
- @spec cleanup_attachments(boolean(), %{required(:object) => map()}) ::
- {:ok, Oban.Job.t() | nil}
- def cleanup_attachments(true, %{object: _} = params) do
- AttachmentsCleanupWorker.enqueue("cleanup_attachments", params)
- end
-
- def cleanup_attachments(_, _), do: {:ok, nil}
-
def prune(%Object{data: %{"id" => _id}} = object) do
with {:ok, object} <- Repo.delete(object),
{:ok, _} <- invalid_object_cache(object) do
diff --git a/lib/pleroma/workers/attachments_cleanup_worker.ex b/lib/pleroma/workers/attachments_cleanup_worker.ex
index f5090dae7..58bbda94b 100644
--- a/lib/pleroma/workers/attachments_cleanup_worker.ex
+++ b/lib/pleroma/workers/attachments_cleanup_worker.ex
@@ -5,30 +5,61 @@
defmodule Pleroma.Workers.AttachmentsCleanupWorker do
import Ecto.Query
+ alias Pleroma.Config
alias Pleroma.Object
alias Pleroma.Repo
use Pleroma.Workers.WorkerHelper, queue: "attachments_cleanup"
+ @doc """
+ Takes object data and if necessary enqueues a job,
+ deleting all attachments of the post eligible for cleanup
+ """
+ @spec enqueue_if_needed(map()) :: {:ok, Oban.Job.t()} | {:ok, :skip} | {:error, any()}
+ def enqueue_if_needed(%{
+ "actor" => actor,
+ "attachment" => [_ | _] = attachments
+ }) do
+ with true <- Config.get([:instance, :cleanup_attachments]),
+ true <- URI.parse(actor).host == Pleroma.Web.Endpoint.host(),
+ [_ | _] <- attachments do
+ enqueue("cleanup_attachments", %{"actor" => actor, "attachments" => attachments})
+ else
+ _ -> {:ok, :skip}
+ end
+ end
+
+ def enqueue_if_needed(_), do: {:ok, :skip}
+
@impl Oban.Worker
def perform(%Job{
args: %{
"op" => "cleanup_attachments",
- "object" => %{"data" => %{"attachment" => [_ | _] = attachments, "actor" => actor}}
+ "attachments" => [_ | _] = attachments,
+ "actor" => actor
}
}) do
- if Pleroma.Config.get([:instance, :cleanup_attachments], false) do
- attachments
- |> Enum.flat_map(fn item -> Enum.map(item["url"], & &1["href"]) end)
- |> fetch_objects
- |> prepare_objects(actor, Enum.map(attachments, & &1["name"]))
- |> filter_objects
- |> do_clean
- end
+ attachments
+ |> Enum.flat_map(fn item -> Enum.map(item["url"], & &1["href"]) end)
+ |> fetch_objects
+ |> prepare_objects(actor, Enum.map(attachments, & &1["name"]))
+ |> filter_objects
+ |> do_clean
{:ok, :success}
end
+ # Left over already enqueued jobs in the old format
+ # This function clause can be deleted once sufficient time passed after 3.14
+ def perform(%Job{
+ args: %{
+ "op" => "cleanup_attachments",
+ "object" => %{"data" => data}
+ }
+ }) do
+ enqueue_if_needed(data)
+ end
+
def perform(%Job{args: %{"op" => "cleanup_attachments", "object" => _object}}), do: {:ok, :skip}
defp do_clean({object_ids, attachment_urls}) do
diff --git a/test/pleroma/workers/attachments_cleanup_worker_test.exs b/test/pleroma/workers/attachments_cleanup_worker_test.exs
new file mode 100644
index 000000000..2212db927
--- /dev/null
+++ b/test/pleroma/workers/attachments_cleanup_worker_test.exs
@@ -0,0 +1,60 @@
+# Akkoma: Magically expressive social media
+# Copyright © 2024 Akkoma Authors <https://akkoma.dev/>
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule Pleroma.Workers.AttachmentsCleanupWorkerTest do
+ use Pleroma.DataCase, async: false
+ use Oban.Testing, repo: Pleroma.Repo
+
+ import Pleroma.Factory
+
+ alias Pleroma.Workers.AttachmentsCleanupWorker
+
+ setup do
+ clear_config([:instance, :cleanup_attachments], true)
+
+ file = %Plug.Upload{
+ content_type: "image/jpeg",
+ path: Path.absname("test/fixtures/image.jpg"),
+ filename: "an_image.jpg"
+ }
+
+ user = insert(:user)
+
+ {:ok, %Pleroma.Object{} = attachment} =
+ Pleroma.Web.ActivityPub.ActivityPub.upload(file, actor: user.ap_id)
+
+ {:ok, attachment: attachment, user: user}
+ end
+
+ test "does not enqueue remote post" do
+ remote_data = %{
+ "id" => "https://remote.example/obj/123",
+ "actor" => "https://remote.example/user/1",
+ "content" => "content",
+ "attachment" => [
+ %{
+ "type" => "Document",
+ "mediaType" => "image/png",
+ "name" => "marvellous image",
+ "url" => "https://remote.example/files/image.png"
+ }
+ ]
+ }
+
+ assert {:ok, :skip} = AttachmentsCleanupWorker.enqueue_if_needed(remote_data)
+ end
+
+ test "enqueues local post", %{attachment: attachment, user: user} do
+ local_url = Pleroma.Web.Endpoint.url()
+
+ local_data = %{
+ "id" => local_url <> "/obj/123",
+ "actor" => user.ap_id,
+ "content" => "content",
+ "attachment" => [attachment.data]
+ }
+
+ assert {:ok, %Oban.Job{}} = AttachmentsCleanupWorker.enqueue_if_needed(local_data)
+ end
+end

View file

@ -0,0 +1,107 @@
From 5e1a47ce703dc071943220c25c9d874c49408c4d Mon Sep 17 00:00:00 2001
From: Oneric <oneric@oneric.stub>
Date: Mon, 3 Jun 2024 23:07:10 +0200
Subject: [PATCH] Delay attachment deletion
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Otherwise attachments have a high chance to disappear with akkoma-fes
“delete & redraft” feature when cleanup is enabled in the backend. Since
we don't know whether a deletion was intended to be part of a redraft
process or even if whether the redraft was abandoned we still have to
delete attachments eventually.
A thirty minute delay should provide sufficient time for redrafting.
Fixes: https://akkoma.dev/AkkomaGang/akkoma/issues/775
---
config/config.exs | 1 +
docs/docs/configuration/cheatsheet.md | 1 +
.../workers/attachments_cleanup_worker.ex | 6 ++++-
.../attachments_cleanup_worker_test.exs | 26 +++++++++++++++++++
5 files changed, 33 insertions(+), 1 deletion(-)
diff --git a/config/config.exs b/config/config.exs
index e919910b3..39b53a010 100644
--- a/config/config.exs
+++ b/config/config.exs
@@ -255,6 +255,7 @@ config :pleroma, :instance,
external_user_synchronization: true,
extended_nickname_format: true,
cleanup_attachments: false,
+ cleanup_attachments_delay: 1800,
multi_factor_authentication: [
totp: [
# digits 6 or 8
diff --git a/docs/docs/configuration/cheatsheet.md b/docs/docs/configuration/cheatsheet.md
index 80f5c3577..bbd353d70 100644
--- a/docs/docs/configuration/cheatsheet.md
+++ b/docs/docs/configuration/cheatsheet.md
@@ -58,6 +58,7 @@ To add configuration to your config file, you can copy it from the base config.
* `registration_reason_length`: Maximum registration reason length (default: `500`).
* `external_user_synchronization`: Enabling following/followers counters synchronization for external users.
* `cleanup_attachments`: Remove attachments along with statuses. Does not affect duplicate files and attachments without status. Enabling this will increase load to database when deleting statuses on larger instances.
+* `cleanup_attachments_delay`: How many seconds to wait after post deletion before attempting to deletion; useful for “delete & redraft” functionality (default: `1800`)
* `show_reactions`: Let favourites and emoji reactions be viewed through the API (default: `true`).
* `password_reset_token_validity`: The time after which reset tokens aren't accepted anymore, in seconds (default: one day).
* `local_bubble`: Array of domains representing instances closely related to yours. Used to populate the `bubble` timeline. e.g `["example.com"]`, (default: `[]`)
diff --git a/lib/pleroma/workers/attachments_cleanup_worker.ex b/lib/pleroma/workers/attachments_cleanup_worker.ex
index 58bbda94b..f1204a861 100644
--- a/lib/pleroma/workers/attachments_cleanup_worker.ex
+++ b/lib/pleroma/workers/attachments_cleanup_worker.ex
@@ -23,7 +23,11 @@ defmodule Pleroma.Workers.AttachmentsCleanupWorker do
with true <- Config.get([:instance, :cleanup_attachments]),
true <- URI.parse(actor).host == Pleroma.Web.Endpoint.host(),
[_ | _] <- attachments do
- enqueue("cleanup_attachments", %{"actor" => actor, "attachments" => attachments})
+ enqueue(
+ "cleanup_attachments",
+ %{"actor" => actor, "attachments" => attachments},
+ schedule_in: Config.get!([:instance, :cleanup_attachments_delay])
+ )
else
_ -> {:ok, :skip}
end
diff --git a/test/pleroma/workers/attachments_cleanup_worker_test.exs b/test/pleroma/workers/attachments_cleanup_worker_test.exs
index 2212db927..d180763fb 100644
--- a/test/pleroma/workers/attachments_cleanup_worker_test.exs
+++ b/test/pleroma/workers/attachments_cleanup_worker_test.exs
@@ -8,7 +8,9 @@ defmodule Pleroma.Workers.AttachmentsCleanupWorkerTest do
import Pleroma.Factory
+ alias Pleroma.Object
alias Pleroma.Workers.AttachmentsCleanupWorker
+ alias Pleroma.Tests.ObanHelpers
setup do
clear_config([:instance, :cleanup_attachments], true)
@@ -57,4 +59,28 @@ defmodule Pleroma.Workers.AttachmentsCleanupWorkerTest do
assert {:ok, %Oban.Job{}} = AttachmentsCleanupWorker.enqueue_if_needed(local_data)
end
+
+ test "doesn't delete immediately", %{attachment: attachment, user: user} do
+ delay = 6000
+ clear_config([:instance, :cleanup_attachments_delay], delay)
+
+ note = insert(:note, %{user: user, data: %{"attachment" => [attachment.data]}})
+
+ uploads_dir = Pleroma.Config.get!([Pleroma.Uploaders.Local, :uploads])
+ %{"url" => [%{"href" => href}]} = attachment.data
+ path = "#{uploads_dir}/#{Path.basename(href)}"
+
+ assert File.exists?(path)
+
+ Object.delete(note)
+ Process.sleep(2000)
+
+ assert File.exists?(path)
+
+ ObanHelpers.perform(all_enqueued(worker: Pleroma.Workers.AttachmentsCleanupWorker))
+
+ assert Object.get_by_id(note.id).data["deleted"]
+ assert Object.get_by_id(attachment.id) == nil
+ refute File.exists?(path)
+ end
end

View file

@ -1 +1,5 @@
000_plant-a-forest.patch
# Attachment deletion is costly; don't waste resources on futile attempts
# and fix delete+redraft if cleanup is enabled
pr789_01_dont-attempt-to-delete-remote-attachs.patch
pr789_02_attach-deletion-grace-period.patch