From 4f69d63ba13b02261dd39effce3ede2dbbb38d9f Mon Sep 17 00:00:00 2001 From: Oneric Date: Sun, 2 Jun 2024 21:42:36 +0200 Subject: [PATCH] Don't try to cleanup remote attachments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cleanup attachment worker was run for every deleted post, even if it’s a remote post whose attachments we don't even store. This was especially bad due to attachment cleanup involving a particularly heavy query wasting a bunch of database perf for nil. This was uncovered by comparing statistics from https://akkoma.dev/AkkomaGang/akkoma/issues/784 and https://akkoma.dev/AkkomaGang/akkoma/issues/765#issuecomment-12256 --- lib/pleroma/object.ex | 15 +---- .../workers/attachments_cleanup_worker.ex | 49 ++++++++++++--- .../attachments_cleanup_worker_test.exs | 60 +++++++++++++++++++ 3 files changed, 101 insertions(+), 23 deletions(-) create mode 100644 test/pleroma/workers/attachments_cleanup_worker_test.exs diff --git a/lib/pleroma/object.ex b/lib/pleroma/object.ex index 379b361f8..5d84bb286 100644 --- a/lib/pleroma/object.ex +++ b/lib/pleroma/object.ex @@ -9,7 +9,6 @@ defmodule Pleroma.Object do import Ecto.Changeset alias Pleroma.Activity - alias Pleroma.Config alias Pleroma.Hashtag alias Pleroma.Object alias Pleroma.Object.Fetcher @@ -241,23 +240,11 @@ defmodule Pleroma.Object do with {:ok, _obj} = swap_object_with_tombstone(object), deleted_activity = Activity.delete_all_by_object_ap_id(id), {:ok, _} <- invalid_object_cache(object) do - cleanup_attachments( - Config.get([:instance, :cleanup_attachments]), - %{object: object} - ) - + AttachmentsCleanupWorker.enqueue_if_needed(object.data) {:ok, object, deleted_activity} end end - @spec cleanup_attachments(boolean(), %{required(:object) => map()}) :: - {:ok, Oban.Job.t() | nil} - def cleanup_attachments(true, %{object: _} = params) do - AttachmentsCleanupWorker.enqueue("cleanup_attachments", params) - end - - def cleanup_attachments(_, _), do: {:ok, nil} - def prune(%Object{data: %{"id" => _id}} = object) do with {:ok, object} <- Repo.delete(object), {:ok, _} <- invalid_object_cache(object) do diff --git a/lib/pleroma/workers/attachments_cleanup_worker.ex b/lib/pleroma/workers/attachments_cleanup_worker.ex index f5090dae7..58bbda94b 100644 --- a/lib/pleroma/workers/attachments_cleanup_worker.ex +++ b/lib/pleroma/workers/attachments_cleanup_worker.ex @@ -5,30 +5,61 @@ defmodule Pleroma.Workers.AttachmentsCleanupWorker do import Ecto.Query + alias Pleroma.Config alias Pleroma.Object alias Pleroma.Repo use Pleroma.Workers.WorkerHelper, queue: "attachments_cleanup" + @doc """ + Takes object data and if necessary enqueues a job, + deleting all attachments of the post eligible for cleanup + """ + @spec enqueue_if_needed(map()) :: {:ok, Oban.Job.t()} | {:ok, :skip} | {:error, any()} + def enqueue_if_needed(%{ + "actor" => actor, + "attachment" => [_ | _] = attachments + }) do + with true <- Config.get([:instance, :cleanup_attachments]), + true <- URI.parse(actor).host == Pleroma.Web.Endpoint.host(), + [_ | _] <- attachments do + enqueue("cleanup_attachments", %{"actor" => actor, "attachments" => attachments}) + else + _ -> {:ok, :skip} + end + end + + def enqueue_if_needed(_), do: {:ok, :skip} + @impl Oban.Worker def perform(%Job{ args: %{ "op" => "cleanup_attachments", - "object" => %{"data" => %{"attachment" => [_ | _] = attachments, "actor" => actor}} + "attachments" => [_ | _] = attachments, + "actor" => actor } }) do - if Pleroma.Config.get([:instance, :cleanup_attachments], false) do - attachments - |> Enum.flat_map(fn item -> Enum.map(item["url"], & &1["href"]) end) - |> fetch_objects - |> prepare_objects(actor, Enum.map(attachments, & &1["name"])) - |> filter_objects - |> do_clean - end + attachments + |> Enum.flat_map(fn item -> Enum.map(item["url"], & &1["href"]) end) + |> fetch_objects + |> prepare_objects(actor, Enum.map(attachments, & &1["name"])) + |> filter_objects + |> do_clean {:ok, :success} end + # Left over already enqueued jobs in the old format + # This function clause can be deleted once sufficient time passed after 3.14 + def perform(%Job{ + args: %{ + "op" => "cleanup_attachments", + "object" => %{"data" => data} + } + }) do + enqueue_if_needed(data) + end + def perform(%Job{args: %{"op" => "cleanup_attachments", "object" => _object}}), do: {:ok, :skip} defp do_clean({object_ids, attachment_urls}) do diff --git a/test/pleroma/workers/attachments_cleanup_worker_test.exs b/test/pleroma/workers/attachments_cleanup_worker_test.exs new file mode 100644 index 000000000..2212db927 --- /dev/null +++ b/test/pleroma/workers/attachments_cleanup_worker_test.exs @@ -0,0 +1,60 @@ +# Akkoma: Magically expressive social media +# Copyright © 2024 Akkoma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.Workers.AttachmentsCleanupWorkerTest do + use Pleroma.DataCase, async: false + use Oban.Testing, repo: Pleroma.Repo + + import Pleroma.Factory + + alias Pleroma.Workers.AttachmentsCleanupWorker + + setup do + clear_config([:instance, :cleanup_attachments], true) + + file = %Plug.Upload{ + content_type: "image/jpeg", + path: Path.absname("test/fixtures/image.jpg"), + filename: "an_image.jpg" + } + + user = insert(:user) + + {:ok, %Pleroma.Object{} = attachment} = + Pleroma.Web.ActivityPub.ActivityPub.upload(file, actor: user.ap_id) + + {:ok, attachment: attachment, user: user} + end + + test "does not enqueue remote post" do + remote_data = %{ + "id" => "https://remote.example/obj/123", + "actor" => "https://remote.example/user/1", + "content" => "content", + "attachment" => [ + %{ + "type" => "Document", + "mediaType" => "image/png", + "name" => "marvellous image", + "url" => "https://remote.example/files/image.png" + } + ] + } + + assert {:ok, :skip} = AttachmentsCleanupWorker.enqueue_if_needed(remote_data) + end + + test "enqueues local post", %{attachment: attachment, user: user} do + local_url = Pleroma.Web.Endpoint.url() + + local_data = %{ + "id" => local_url <> "/obj/123", + "actor" => user.ap_id, + "content" => "content", + "attachment" => [attachment.data] + } + + assert {:ok, %Oban.Job{}} = AttachmentsCleanupWorker.enqueue_if_needed(local_data) + end +end