From 34cb5b350c4b72bf443b1747070ca70942e24ec9 Mon Sep 17 00:00:00 2001 From: Oneric Date: Sun, 2 Jun 2024 21:42:36 +0200 Subject: [PATCH 1/2] [TMP] following commits depend on #789 this is https://akkoma.dev/AkkomaGang/akkoma/pulls/789 squahsed into a single commit --- CHANGELOG.md | 2 + config/config.exs | 1 + docs/docs/configuration/cheatsheet.md | 1 + lib/pleroma/object.ex | 15 +--- .../workers/attachments_cleanup_worker.ex | 53 ++++++++++-- .../attachments_cleanup_worker_test.exs | 86 +++++++++++++++++++ 6 files changed, 135 insertions(+), 23 deletions(-) create mode 100644 test/pleroma/workers/attachments_cleanup_worker_test.exs diff --git a/CHANGELOG.md b/CHANGELOG.md index 3c743e5bd..5a3e50d4d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,9 +14,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - Meilisearch: it is now possible to use separate keys for search and admin actions - New standalone `prune_orphaned_activities` mix task with configurable batch limit - The `prune_objects` mix task now accepts a `--limit` parameter for initial object pruning +- New config option `:instance, :cleanup_attachments_delay` ## Fixed - Meilisearch: order of results returned from our REST API now actually matches how Meilisearch ranks results +- Fix “Delete & Redraft” often losing attachments if attachment cleanup was enabled ## Changed - Refactored Rich Media to cache the content in the database. Fetching operations that could block status rendering have been eliminated. diff --git a/config/config.exs b/config/config.exs index e919910b3..39b53a010 100644 --- a/config/config.exs +++ b/config/config.exs @@ -255,6 +255,7 @@ external_user_synchronization: true, extended_nickname_format: true, cleanup_attachments: false, + cleanup_attachments_delay: 1800, multi_factor_authentication: [ totp: [ # digits 6 or 8 diff --git a/docs/docs/configuration/cheatsheet.md b/docs/docs/configuration/cheatsheet.md index 80f5c3577..bbd353d70 100644 --- a/docs/docs/configuration/cheatsheet.md +++ b/docs/docs/configuration/cheatsheet.md @@ -58,6 +58,7 @@ To add configuration to your config file, you can copy it from the base config. * `registration_reason_length`: Maximum registration reason length (default: `500`). * `external_user_synchronization`: Enabling following/followers counters synchronization for external users. * `cleanup_attachments`: Remove attachments along with statuses. Does not affect duplicate files and attachments without status. Enabling this will increase load to database when deleting statuses on larger instances. +* `cleanup_attachments_delay`: How many seconds to wait after post deletion before attempting to deletion; useful for “delete & redraft” functionality (default: `1800`) * `show_reactions`: Let favourites and emoji reactions be viewed through the API (default: `true`). * `password_reset_token_validity`: The time after which reset tokens aren't accepted anymore, in seconds (default: one day). * `local_bubble`: Array of domains representing instances closely related to yours. Used to populate the `bubble` timeline. e.g `["example.com"]`, (default: `[]`) diff --git a/lib/pleroma/object.ex b/lib/pleroma/object.ex index 379b361f8..5d84bb286 100644 --- a/lib/pleroma/object.ex +++ b/lib/pleroma/object.ex @@ -9,7 +9,6 @@ defmodule Pleroma.Object do import Ecto.Changeset alias Pleroma.Activity - alias Pleroma.Config alias Pleroma.Hashtag alias Pleroma.Object alias Pleroma.Object.Fetcher @@ -241,23 +240,11 @@ def delete(%Object{data: %{"id" => id}} = object) do with {:ok, _obj} = swap_object_with_tombstone(object), deleted_activity = Activity.delete_all_by_object_ap_id(id), {:ok, _} <- invalid_object_cache(object) do - cleanup_attachments( - Config.get([:instance, :cleanup_attachments]), - %{object: object} - ) - + AttachmentsCleanupWorker.enqueue_if_needed(object.data) {:ok, object, deleted_activity} end end - @spec cleanup_attachments(boolean(), %{required(:object) => map()}) :: - {:ok, Oban.Job.t() | nil} - def cleanup_attachments(true, %{object: _} = params) do - AttachmentsCleanupWorker.enqueue("cleanup_attachments", params) - end - - def cleanup_attachments(_, _), do: {:ok, nil} - def prune(%Object{data: %{"id" => _id}} = object) do with {:ok, object} <- Repo.delete(object), {:ok, _} <- invalid_object_cache(object) do diff --git a/lib/pleroma/workers/attachments_cleanup_worker.ex b/lib/pleroma/workers/attachments_cleanup_worker.ex index f5090dae7..f1204a861 100644 --- a/lib/pleroma/workers/attachments_cleanup_worker.ex +++ b/lib/pleroma/workers/attachments_cleanup_worker.ex @@ -5,30 +5,65 @@ defmodule Pleroma.Workers.AttachmentsCleanupWorker do import Ecto.Query + alias Pleroma.Config alias Pleroma.Object alias Pleroma.Repo use Pleroma.Workers.WorkerHelper, queue: "attachments_cleanup" + @doc """ + Takes object data and if necessary enqueues a job, + deleting all attachments of the post eligible for cleanup + """ + @spec enqueue_if_needed(map()) :: {:ok, Oban.Job.t()} | {:ok, :skip} | {:error, any()} + def enqueue_if_needed(%{ + "actor" => actor, + "attachment" => [_ | _] = attachments + }) do + with true <- Config.get([:instance, :cleanup_attachments]), + true <- URI.parse(actor).host == Pleroma.Web.Endpoint.host(), + [_ | _] <- attachments do + enqueue( + "cleanup_attachments", + %{"actor" => actor, "attachments" => attachments}, + schedule_in: Config.get!([:instance, :cleanup_attachments_delay]) + ) + else + _ -> {:ok, :skip} + end + end + + def enqueue_if_needed(_), do: {:ok, :skip} + @impl Oban.Worker def perform(%Job{ args: %{ "op" => "cleanup_attachments", - "object" => %{"data" => %{"attachment" => [_ | _] = attachments, "actor" => actor}} + "attachments" => [_ | _] = attachments, + "actor" => actor } }) do - if Pleroma.Config.get([:instance, :cleanup_attachments], false) do - attachments - |> Enum.flat_map(fn item -> Enum.map(item["url"], & &1["href"]) end) - |> fetch_objects - |> prepare_objects(actor, Enum.map(attachments, & &1["name"])) - |> filter_objects - |> do_clean - end + attachments + |> Enum.flat_map(fn item -> Enum.map(item["url"], & &1["href"]) end) + |> fetch_objects + |> prepare_objects(actor, Enum.map(attachments, & &1["name"])) + |> filter_objects + |> do_clean {:ok, :success} end + # Left over already enqueued jobs in the old format + # This function clause can be deleted once sufficient time passed after 3.14 + def perform(%Job{ + args: %{ + "op" => "cleanup_attachments", + "object" => %{"data" => data} + } + }) do + enqueue_if_needed(data) + end + def perform(%Job{args: %{"op" => "cleanup_attachments", "object" => _object}}), do: {:ok, :skip} defp do_clean({object_ids, attachment_urls}) do diff --git a/test/pleroma/workers/attachments_cleanup_worker_test.exs b/test/pleroma/workers/attachments_cleanup_worker_test.exs new file mode 100644 index 000000000..d180763fb --- /dev/null +++ b/test/pleroma/workers/attachments_cleanup_worker_test.exs @@ -0,0 +1,86 @@ +# Akkoma: Magically expressive social media +# Copyright © 2024 Akkoma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.Workers.AttachmentsCleanupWorkerTest do + use Pleroma.DataCase, async: false + use Oban.Testing, repo: Pleroma.Repo + + import Pleroma.Factory + + alias Pleroma.Object + alias Pleroma.Workers.AttachmentsCleanupWorker + alias Pleroma.Tests.ObanHelpers + + setup do + clear_config([:instance, :cleanup_attachments], true) + + file = %Plug.Upload{ + content_type: "image/jpeg", + path: Path.absname("test/fixtures/image.jpg"), + filename: "an_image.jpg" + } + + user = insert(:user) + + {:ok, %Pleroma.Object{} = attachment} = + Pleroma.Web.ActivityPub.ActivityPub.upload(file, actor: user.ap_id) + + {:ok, attachment: attachment, user: user} + end + + test "does not enqueue remote post" do + remote_data = %{ + "id" => "https://remote.example/obj/123", + "actor" => "https://remote.example/user/1", + "content" => "content", + "attachment" => [ + %{ + "type" => "Document", + "mediaType" => "image/png", + "name" => "marvellous image", + "url" => "https://remote.example/files/image.png" + } + ] + } + + assert {:ok, :skip} = AttachmentsCleanupWorker.enqueue_if_needed(remote_data) + end + + test "enqueues local post", %{attachment: attachment, user: user} do + local_url = Pleroma.Web.Endpoint.url() + + local_data = %{ + "id" => local_url <> "/obj/123", + "actor" => user.ap_id, + "content" => "content", + "attachment" => [attachment.data] + } + + assert {:ok, %Oban.Job{}} = AttachmentsCleanupWorker.enqueue_if_needed(local_data) + end + + test "doesn't delete immediately", %{attachment: attachment, user: user} do + delay = 6000 + clear_config([:instance, :cleanup_attachments_delay], delay) + + note = insert(:note, %{user: user, data: %{"attachment" => [attachment.data]}}) + + uploads_dir = Pleroma.Config.get!([Pleroma.Uploaders.Local, :uploads]) + %{"url" => [%{"href" => href}]} = attachment.data + path = "#{uploads_dir}/#{Path.basename(href)}" + + assert File.exists?(path) + + Object.delete(note) + Process.sleep(2000) + + assert File.exists?(path) + + ObanHelpers.perform(all_enqueued(worker: Pleroma.Workers.AttachmentsCleanupWorker)) + + assert Object.get_by_id(note.id).data["deleted"] + assert Object.get_by_id(attachment.id) == nil + refute File.exists?(path) + end +end -- 2.43.0 From 219fffa0c39161c8566d382b46a112a2de92879c Mon Sep 17 00:00:00 2001 From: Oneric Date: Sun, 2 Jun 2024 21:54:06 +0200 Subject: [PATCH 2/2] Allow deleting uploaded files after media migration Until now only the current base_url was checked. After a media domain migration all pre-existing files thus turned undeletable. Fix this with a new config option allowing to list all old media base urls. (This may also come in handy for a later db refactor, see https://akkoma.dev/AkkomaGang/akkoma/issues/765) --- CHANGELOG.md | 2 ++ config/config.exs | 1 + docs/docs/configuration/cheatsheet.md | 1 + .../workers/attachments_cleanup_worker.ex | 35 +++++++++++++++---- .../attachments_cleanup_worker_test.exs | 21 +++++++++++ 5 files changed, 54 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5a3e50d4d..35d787a02 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,10 +15,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - New standalone `prune_orphaned_activities` mix task with configurable batch limit - The `prune_objects` mix task now accepts a `--limit` parameter for initial object pruning - New config option `:instance, :cleanup_attachments_delay` +- New config option `Pleroma.Upload, :all_base_urls` ## Fixed - Meilisearch: order of results returned from our REST API now actually matches how Meilisearch ranks results - Fix “Delete & Redraft” often losing attachments if attachment cleanup was enabled +- If `Pleroma.Upload, :all_base_urls` is set accordingly, uploaded files can now be deleted after a domain migration. ## Changed - Refactored Rich Media to cache the content in the database. Fetching operations that could block status rendering have been eliminated. diff --git a/config/config.exs b/config/config.exs index 39b53a010..22db62f07 100644 --- a/config/config.exs +++ b/config/config.exs @@ -65,6 +65,7 @@ link_name: false, filename_display_max_length: 30, base_url: nil, + all_base_urls: nil, allowed_mime_types: ["image", "audio", "video"] config :pleroma, Pleroma.Uploaders.Local, uploads: "uploads" diff --git a/docs/docs/configuration/cheatsheet.md b/docs/docs/configuration/cheatsheet.md index bbd353d70..dd38535ae 100644 --- a/docs/docs/configuration/cheatsheet.md +++ b/docs/docs/configuration/cheatsheet.md @@ -606,6 +606,7 @@ the source code is here: [kocaptcha](https://github.com/koto-bank/kocaptcha). Th * `link_name`: When enabled Akkoma will add a `name` parameter to the url of the upload, for example `https://instance.tld/media/corndog.png?name=corndog.png`. This is needed to provide the correct filename in Content-Disposition headers * `base_url`: The base URL to access a user-uploaded file; MUST be configured explicitly. Using a (sub)domain distinct from the instance endpoint is **strongly** recommended. A good value might be `https://media.myakkoma.instance/media/`. +* `all_base_url`: list of all base urls ever used *(**both** current and past)*; if unset defaults to a single-entry list containig the current `base_url` * `proxy_opts`: Proxy options, see `Pleroma.ReverseProxy` documentation. * `filename_display_max_length`: Set max length of a filename to display. 0 = no limit. Default: 30. diff --git a/lib/pleroma/workers/attachments_cleanup_worker.ex b/lib/pleroma/workers/attachments_cleanup_worker.ex index f1204a861..558c59755 100644 --- a/lib/pleroma/workers/attachments_cleanup_worker.ex +++ b/lib/pleroma/workers/attachments_cleanup_worker.ex @@ -22,6 +22,7 @@ def enqueue_if_needed(%{ }) do with true <- Config.get([:instance, :cleanup_attachments]), true <- URI.parse(actor).host == Pleroma.Web.Endpoint.host(), + attachments <- Enum.filter(attachments, &deletable_attachment/1), [_ | _] <- attachments do enqueue( "cleanup_attachments", @@ -35,6 +36,18 @@ def enqueue_if_needed(%{ def enqueue_if_needed(_), do: {:ok, :skip} + defp base_urls() do + Config.get([Pleroma.Upload, :all_base_urls]) || + [Config.get!([Pleroma.Upload, :base_url])] + end + + defp deletable_attachment(%{"id" => _id, "url" => [%{"href" => href} | _]}) do + # We can't delete files later if we can't strip the prefix + Enum.any?(base_urls(), fn url -> String.starts_with?(href, url) end) + end + + defp deletable_attachment(_), do: false + @impl Oban.Worker def perform(%Job{ args: %{ @@ -66,18 +79,28 @@ def perform(%Job{ def perform(%Job{args: %{"op" => "cleanup_attachments", "object" => _object}}), do: {:ok, :skip} + defp trim_first_leading(string, []), do: string + + defp trim_first_leading(string, [prefix | rest]) do + trimmed = String.trim_leading(string, prefix) + + if trimmed != string do + trimmed + else + trim_first_leading(string, rest) + end + end + defp do_clean({object_ids, attachment_urls}) do uploader = Pleroma.Config.get([Pleroma.Upload, :uploader]) - base_url = - String.trim_trailing( - Pleroma.Upload.base_url(), - "/" - ) + base_urls = + base_urls() + |> Enum.map(fn url -> String.trim_trailing(url, "/") end) Enum.each(attachment_urls, fn href -> href - |> String.trim_leading("#{base_url}") + |> trim_first_leading(base_urls) |> uploader.delete_file() end) diff --git a/test/pleroma/workers/attachments_cleanup_worker_test.exs b/test/pleroma/workers/attachments_cleanup_worker_test.exs index d180763fb..711492bb9 100644 --- a/test/pleroma/workers/attachments_cleanup_worker_test.exs +++ b/test/pleroma/workers/attachments_cleanup_worker_test.exs @@ -14,6 +14,7 @@ defmodule Pleroma.Workers.AttachmentsCleanupWorkerTest do setup do clear_config([:instance, :cleanup_attachments], true) + clear_config([Pleroma.Upload, :all_base_urls]) file = %Plug.Upload{ content_type: "image/jpeg", @@ -83,4 +84,24 @@ test "doesn't delete immediately", %{attachment: attachment, user: user} do assert Object.get_by_id(attachment.id) == nil refute File.exists?(path) end + + test "skips localpost with unmappable URLs", %{attachment: attachment, user: user} do + local_url = Pleroma.Web.Endpoint.url() + + attach_data = + attachment.data + |> Map.update!("url", fn + [%{"href" => _} = url | _] -> + [%{url | "href" => "https://oldmedia.example/files/123.png"}] + end) + + local_data = %{ + "id" => local_url <> "/obj/123", + "actor" => user.ap_id, + "content" => "content", + "attachment" => [attach_data] + } + + assert {:ok, :skip} = AttachmentsCleanupWorker.enqueue_if_needed(local_data) + end end -- 2.43.0