WIP: fix attachment cleanup after media domain migration #818
6 changed files with 189 additions and 29 deletions
|
@ -14,9 +14,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
||||||
- Meilisearch: it is now possible to use separate keys for search and admin actions
|
- Meilisearch: it is now possible to use separate keys for search and admin actions
|
||||||
- New standalone `prune_orphaned_activities` mix task with configurable batch limit
|
- New standalone `prune_orphaned_activities` mix task with configurable batch limit
|
||||||
- The `prune_objects` mix task now accepts a `--limit` parameter for initial object pruning
|
- The `prune_objects` mix task now accepts a `--limit` parameter for initial object pruning
|
||||||
|
- New config option `:instance, :cleanup_attachments_delay`
|
||||||
|
- New config option `Pleroma.Upload, :all_base_urls`
|
||||||
|
|
||||||
## Fixed
|
## Fixed
|
||||||
- Meilisearch: order of results returned from our REST API now actually matches how Meilisearch ranks results
|
- Meilisearch: order of results returned from our REST API now actually matches how Meilisearch ranks results
|
||||||
|
- Fix “Delete & Redraft” often losing attachments if attachment cleanup was enabled
|
||||||
|
- If `Pleroma.Upload, :all_base_urls` is set accordingly, uploaded files can now be deleted after a domain migration.
|
||||||
|
|
||||||
## Changed
|
## Changed
|
||||||
- Refactored Rich Media to cache the content in the database. Fetching operations that could block status rendering have been eliminated.
|
- Refactored Rich Media to cache the content in the database. Fetching operations that could block status rendering have been eliminated.
|
||||||
|
|
|
@ -65,6 +65,7 @@
|
||||||
link_name: false,
|
link_name: false,
|
||||||
filename_display_max_length: 30,
|
filename_display_max_length: 30,
|
||||||
base_url: nil,
|
base_url: nil,
|
||||||
|
all_base_urls: nil,
|
||||||
allowed_mime_types: ["image", "audio", "video"]
|
allowed_mime_types: ["image", "audio", "video"]
|
||||||
|
|
||||||
config :pleroma, Pleroma.Uploaders.Local, uploads: "uploads"
|
config :pleroma, Pleroma.Uploaders.Local, uploads: "uploads"
|
||||||
|
@ -255,6 +256,7 @@
|
||||||
external_user_synchronization: true,
|
external_user_synchronization: true,
|
||||||
extended_nickname_format: true,
|
extended_nickname_format: true,
|
||||||
cleanup_attachments: false,
|
cleanup_attachments: false,
|
||||||
|
cleanup_attachments_delay: 1800,
|
||||||
multi_factor_authentication: [
|
multi_factor_authentication: [
|
||||||
totp: [
|
totp: [
|
||||||
# digits 6 or 8
|
# digits 6 or 8
|
||||||
|
|
|
@ -58,6 +58,7 @@ To add configuration to your config file, you can copy it from the base config.
|
||||||
* `registration_reason_length`: Maximum registration reason length (default: `500`).
|
* `registration_reason_length`: Maximum registration reason length (default: `500`).
|
||||||
* `external_user_synchronization`: Enabling following/followers counters synchronization for external users.
|
* `external_user_synchronization`: Enabling following/followers counters synchronization for external users.
|
||||||
* `cleanup_attachments`: Remove attachments along with statuses. Does not affect duplicate files and attachments without status. Enabling this will increase load to database when deleting statuses on larger instances.
|
* `cleanup_attachments`: Remove attachments along with statuses. Does not affect duplicate files and attachments without status. Enabling this will increase load to database when deleting statuses on larger instances.
|
||||||
|
* `cleanup_attachments_delay`: How many seconds to wait after post deletion before attempting to deletion; useful for “delete & redraft” functionality (default: `1800`)
|
||||||
* `show_reactions`: Let favourites and emoji reactions be viewed through the API (default: `true`).
|
* `show_reactions`: Let favourites and emoji reactions be viewed through the API (default: `true`).
|
||||||
* `password_reset_token_validity`: The time after which reset tokens aren't accepted anymore, in seconds (default: one day).
|
* `password_reset_token_validity`: The time after which reset tokens aren't accepted anymore, in seconds (default: one day).
|
||||||
* `local_bubble`: Array of domains representing instances closely related to yours. Used to populate the `bubble` timeline. e.g `["example.com"]`, (default: `[]`)
|
* `local_bubble`: Array of domains representing instances closely related to yours. Used to populate the `bubble` timeline. e.g `["example.com"]`, (default: `[]`)
|
||||||
|
@ -605,6 +606,7 @@ the source code is here: [kocaptcha](https://github.com/koto-bank/kocaptcha). Th
|
||||||
* `link_name`: When enabled Akkoma will add a `name` parameter to the url of the upload, for example `https://instance.tld/media/corndog.png?name=corndog.png`. This is needed to provide the correct filename in Content-Disposition headers
|
* `link_name`: When enabled Akkoma will add a `name` parameter to the url of the upload, for example `https://instance.tld/media/corndog.png?name=corndog.png`. This is needed to provide the correct filename in Content-Disposition headers
|
||||||
* `base_url`: The base URL to access a user-uploaded file; MUST be configured explicitly.
|
* `base_url`: The base URL to access a user-uploaded file; MUST be configured explicitly.
|
||||||
Using a (sub)domain distinct from the instance endpoint is **strongly** recommended. A good value might be `https://media.myakkoma.instance/media/`.
|
Using a (sub)domain distinct from the instance endpoint is **strongly** recommended. A good value might be `https://media.myakkoma.instance/media/`.
|
||||||
|
* `all_base_url`: list of all base urls ever used *(**both** current and past)*; if unset defaults to a single-entry list containig the current `base_url`
|
||||||
* `proxy_opts`: Proxy options, see `Pleroma.ReverseProxy` documentation.
|
* `proxy_opts`: Proxy options, see `Pleroma.ReverseProxy` documentation.
|
||||||
* `filename_display_max_length`: Set max length of a filename to display. 0 = no limit. Default: 30.
|
* `filename_display_max_length`: Set max length of a filename to display. 0 = no limit. Default: 30.
|
||||||
|
|
||||||
|
|
|
@ -9,7 +9,6 @@ defmodule Pleroma.Object do
|
||||||
import Ecto.Changeset
|
import Ecto.Changeset
|
||||||
|
|
||||||
alias Pleroma.Activity
|
alias Pleroma.Activity
|
||||||
alias Pleroma.Config
|
|
||||||
alias Pleroma.Hashtag
|
alias Pleroma.Hashtag
|
||||||
alias Pleroma.Object
|
alias Pleroma.Object
|
||||||
alias Pleroma.Object.Fetcher
|
alias Pleroma.Object.Fetcher
|
||||||
|
@ -241,23 +240,11 @@ def delete(%Object{data: %{"id" => id}} = object) do
|
||||||
with {:ok, _obj} = swap_object_with_tombstone(object),
|
with {:ok, _obj} = swap_object_with_tombstone(object),
|
||||||
deleted_activity = Activity.delete_all_by_object_ap_id(id),
|
deleted_activity = Activity.delete_all_by_object_ap_id(id),
|
||||||
{:ok, _} <- invalid_object_cache(object) do
|
{:ok, _} <- invalid_object_cache(object) do
|
||||||
cleanup_attachments(
|
AttachmentsCleanupWorker.enqueue_if_needed(object.data)
|
||||||
Config.get([:instance, :cleanup_attachments]),
|
|
||||||
%{object: object}
|
|
||||||
)
|
|
||||||
|
|
||||||
{:ok, object, deleted_activity}
|
{:ok, object, deleted_activity}
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
@spec cleanup_attachments(boolean(), %{required(:object) => map()}) ::
|
|
||||||
{:ok, Oban.Job.t() | nil}
|
|
||||||
def cleanup_attachments(true, %{object: _} = params) do
|
|
||||||
AttachmentsCleanupWorker.enqueue("cleanup_attachments", params)
|
|
||||||
end
|
|
||||||
|
|
||||||
def cleanup_attachments(_, _), do: {:ok, nil}
|
|
||||||
|
|
||||||
def prune(%Object{data: %{"id" => _id}} = object) do
|
def prune(%Object{data: %{"id" => _id}} = object) do
|
||||||
with {:ok, object} <- Repo.delete(object),
|
with {:ok, object} <- Repo.delete(object),
|
||||||
{:ok, _} <- invalid_object_cache(object) do
|
{:ok, _} <- invalid_object_cache(object) do
|
||||||
|
|
|
@ -5,44 +5,102 @@
|
||||||
defmodule Pleroma.Workers.AttachmentsCleanupWorker do
|
defmodule Pleroma.Workers.AttachmentsCleanupWorker do
|
||||||
import Ecto.Query
|
import Ecto.Query
|
||||||
|
|
||||||
|
alias Pleroma.Config
|
||||||
alias Pleroma.Object
|
alias Pleroma.Object
|
||||||
alias Pleroma.Repo
|
alias Pleroma.Repo
|
||||||
|
|
||||||
use Pleroma.Workers.WorkerHelper, queue: "attachments_cleanup"
|
use Pleroma.Workers.WorkerHelper, queue: "attachments_cleanup"
|
||||||
|
|
||||||
|
@doc """
|
||||||
|
Takes object data and if necessary enqueues a job,
|
||||||
|
deleting all attachments of the post eligible for cleanup
|
||||||
|
"""
|
||||||
|
@spec enqueue_if_needed(map()) :: {:ok, Oban.Job.t()} | {:ok, :skip} | {:error, any()}
|
||||||
|
def enqueue_if_needed(%{
|
||||||
|
"actor" => actor,
|
||||||
|
"attachment" => [_ | _] = attachments
|
||||||
|
}) do
|
||||||
|
with true <- Config.get([:instance, :cleanup_attachments]),
|
||||||
|
true <- URI.parse(actor).host == Pleroma.Web.Endpoint.host(),
|
||||||
|
attachments <- Enum.filter(attachments, &deletable_attachment/1),
|
||||||
|
[_ | _] <- attachments do
|
||||||
|
enqueue(
|
||||||
|
"cleanup_attachments",
|
||||||
|
%{"actor" => actor, "attachments" => attachments},
|
||||||
|
schedule_in: Config.get!([:instance, :cleanup_attachments_delay])
|
||||||
|
)
|
||||||
|
else
|
||||||
|
_ -> {:ok, :skip}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def enqueue_if_needed(_), do: {:ok, :skip}
|
||||||
|
|
||||||
|
defp base_urls() do
|
||||||
|
Config.get([Pleroma.Upload, :all_base_urls]) ||
|
||||||
|
[Config.get!([Pleroma.Upload, :base_url])]
|
||||||
|
end
|
||||||
|
|
||||||
|
defp deletable_attachment(%{"id" => _id, "url" => [%{"href" => href} | _]}) do
|
||||||
|
# We can't delete files later if we can't strip the prefix
|
||||||
|
Enum.any?(base_urls(), fn url -> String.starts_with?(href, url) end)
|
||||||
|
end
|
||||||
|
|
||||||
|
defp deletable_attachment(_), do: false
|
||||||
|
|
||||||
@impl Oban.Worker
|
@impl Oban.Worker
|
||||||
def perform(%Job{
|
def perform(%Job{
|
||||||
args: %{
|
args: %{
|
||||||
"op" => "cleanup_attachments",
|
"op" => "cleanup_attachments",
|
||||||
"object" => %{"data" => %{"attachment" => [_ | _] = attachments, "actor" => actor}}
|
"attachments" => [_ | _] = attachments,
|
||||||
|
"actor" => actor
|
||||||
}
|
}
|
||||||
}) do
|
}) do
|
||||||
if Pleroma.Config.get([:instance, :cleanup_attachments], false) do
|
attachments
|
||||||
attachments
|
|> Enum.flat_map(fn item -> Enum.map(item["url"], & &1["href"]) end)
|
||||||
|> Enum.flat_map(fn item -> Enum.map(item["url"], & &1["href"]) end)
|
|> fetch_objects
|
||||||
|> fetch_objects
|
|> prepare_objects(actor, Enum.map(attachments, & &1["name"]))
|
||||||
|> prepare_objects(actor, Enum.map(attachments, & &1["name"]))
|
|> filter_objects
|
||||||
|> filter_objects
|
|> do_clean
|
||||||
|> do_clean
|
|
||||||
end
|
|
||||||
|
|
||||||
{:ok, :success}
|
{:ok, :success}
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Left over already enqueued jobs in the old format
|
||||||
|
# This function clause can be deleted once sufficient time passed after 3.14
|
||||||
|
def perform(%Job{
|
||||||
|
args: %{
|
||||||
|
"op" => "cleanup_attachments",
|
||||||
|
"object" => %{"data" => data}
|
||||||
|
}
|
||||||
|
}) do
|
||||||
|
enqueue_if_needed(data)
|
||||||
|
end
|
||||||
|
|
||||||
def perform(%Job{args: %{"op" => "cleanup_attachments", "object" => _object}}), do: {:ok, :skip}
|
def perform(%Job{args: %{"op" => "cleanup_attachments", "object" => _object}}), do: {:ok, :skip}
|
||||||
|
|
||||||
|
defp trim_first_leading(string, []), do: string
|
||||||
|
|
||||||
|
defp trim_first_leading(string, [prefix | rest]) do
|
||||||
|
trimmed = String.trim_leading(string, prefix)
|
||||||
|
|
||||||
|
if trimmed != string do
|
||||||
|
trimmed
|
||||||
|
else
|
||||||
|
trim_first_leading(string, rest)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
defp do_clean({object_ids, attachment_urls}) do
|
defp do_clean({object_ids, attachment_urls}) do
|
||||||
uploader = Pleroma.Config.get([Pleroma.Upload, :uploader])
|
uploader = Pleroma.Config.get([Pleroma.Upload, :uploader])
|
||||||
|
|
||||||
base_url =
|
base_urls =
|
||||||
String.trim_trailing(
|
base_urls()
|
||||||
Pleroma.Upload.base_url(),
|
|> Enum.map(fn url -> String.trim_trailing(url, "/") end)
|
||||||
"/"
|
|
||||||
)
|
|
||||||
|
|
||||||
Enum.each(attachment_urls, fn href ->
|
Enum.each(attachment_urls, fn href ->
|
||||||
href
|
href
|
||||||
|> String.trim_leading("#{base_url}")
|
|> trim_first_leading(base_urls)
|
||||||
|> uploader.delete_file()
|
|> uploader.delete_file()
|
||||||
end)
|
end)
|
||||||
|
|
||||||
|
|
107
test/pleroma/workers/attachments_cleanup_worker_test.exs
Normal file
107
test/pleroma/workers/attachments_cleanup_worker_test.exs
Normal file
|
@ -0,0 +1,107 @@
|
||||||
|
# Akkoma: Magically expressive social media
|
||||||
|
# Copyright © 2024 Akkoma Authors <https://akkoma.dev/>
|
||||||
|
# SPDX-License-Identifier: AGPL-3.0-only
|
||||||
|
|
||||||
|
defmodule Pleroma.Workers.AttachmentsCleanupWorkerTest do
|
||||||
|
use Pleroma.DataCase, async: false
|
||||||
|
use Oban.Testing, repo: Pleroma.Repo
|
||||||
|
|
||||||
|
import Pleroma.Factory
|
||||||
|
|
||||||
|
alias Pleroma.Object
|
||||||
|
alias Pleroma.Workers.AttachmentsCleanupWorker
|
||||||
|
alias Pleroma.Tests.ObanHelpers
|
||||||
|
|
||||||
|
setup do
|
||||||
|
clear_config([:instance, :cleanup_attachments], true)
|
||||||
|
clear_config([Pleroma.Upload, :all_base_urls])
|
||||||
|
|
||||||
|
file = %Plug.Upload{
|
||||||
|
content_type: "image/jpeg",
|
||||||
|
path: Path.absname("test/fixtures/image.jpg"),
|
||||||
|
filename: "an_image.jpg"
|
||||||
|
}
|
||||||
|
|
||||||
|
user = insert(:user)
|
||||||
|
|
||||||
|
{:ok, %Pleroma.Object{} = attachment} =
|
||||||
|
Pleroma.Web.ActivityPub.ActivityPub.upload(file, actor: user.ap_id)
|
||||||
|
|
||||||
|
{:ok, attachment: attachment, user: user}
|
||||||
|
end
|
||||||
|
|
||||||
|
test "does not enqueue remote post" do
|
||||||
|
remote_data = %{
|
||||||
|
"id" => "https://remote.example/obj/123",
|
||||||
|
"actor" => "https://remote.example/user/1",
|
||||||
|
"content" => "content",
|
||||||
|
"attachment" => [
|
||||||
|
%{
|
||||||
|
"type" => "Document",
|
||||||
|
"mediaType" => "image/png",
|
||||||
|
"name" => "marvellous image",
|
||||||
|
"url" => "https://remote.example/files/image.png"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
assert {:ok, :skip} = AttachmentsCleanupWorker.enqueue_if_needed(remote_data)
|
||||||
|
end
|
||||||
|
|
||||||
|
test "enqueues local post", %{attachment: attachment, user: user} do
|
||||||
|
local_url = Pleroma.Web.Endpoint.url()
|
||||||
|
|
||||||
|
local_data = %{
|
||||||
|
"id" => local_url <> "/obj/123",
|
||||||
|
"actor" => user.ap_id,
|
||||||
|
"content" => "content",
|
||||||
|
"attachment" => [attachment.data]
|
||||||
|
}
|
||||||
|
|
||||||
|
assert {:ok, %Oban.Job{}} = AttachmentsCleanupWorker.enqueue_if_needed(local_data)
|
||||||
|
end
|
||||||
|
|
||||||
|
test "doesn't delete immediately", %{attachment: attachment, user: user} do
|
||||||
|
delay = 6000
|
||||||
|
clear_config([:instance, :cleanup_attachments_delay], delay)
|
||||||
|
|
||||||
|
note = insert(:note, %{user: user, data: %{"attachment" => [attachment.data]}})
|
||||||
|
|
||||||
|
uploads_dir = Pleroma.Config.get!([Pleroma.Uploaders.Local, :uploads])
|
||||||
|
%{"url" => [%{"href" => href}]} = attachment.data
|
||||||
|
path = "#{uploads_dir}/#{Path.basename(href)}"
|
||||||
|
|
||||||
|
assert File.exists?(path)
|
||||||
|
|
||||||
|
Object.delete(note)
|
||||||
|
Process.sleep(2000)
|
||||||
|
|
||||||
|
assert File.exists?(path)
|
||||||
|
|
||||||
|
ObanHelpers.perform(all_enqueued(worker: Pleroma.Workers.AttachmentsCleanupWorker))
|
||||||
|
|
||||||
|
assert Object.get_by_id(note.id).data["deleted"]
|
||||||
|
assert Object.get_by_id(attachment.id) == nil
|
||||||
|
refute File.exists?(path)
|
||||||
|
end
|
||||||
|
|
||||||
|
test "skips localpost with unmappable URLs", %{attachment: attachment, user: user} do
|
||||||
|
local_url = Pleroma.Web.Endpoint.url()
|
||||||
|
|
||||||
|
attach_data =
|
||||||
|
attachment.data
|
||||||
|
|> Map.update!("url", fn
|
||||||
|
[%{"href" => _} = url | _] ->
|
||||||
|
[%{url | "href" => "https://oldmedia.example/files/123.png"}]
|
||||||
|
end)
|
||||||
|
|
||||||
|
local_data = %{
|
||||||
|
"id" => local_url <> "/obj/123",
|
||||||
|
"actor" => user.ap_id,
|
||||||
|
"content" => "content",
|
||||||
|
"attachment" => [attach_data]
|
||||||
|
}
|
||||||
|
|
||||||
|
assert {:ok, :skip} = AttachmentsCleanupWorker.enqueue_if_needed(local_data)
|
||||||
|
end
|
||||||
|
end
|
Loading…
Reference in a new issue