Fix various attachment cleanup issues #789

Open
Oneric wants to merge 2 commits from Oneric/akkoma:attachcleanup-overeager into develop
6 changed files with 135 additions and 23 deletions

View file

@ -15,6 +15,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
- Meilisearch: it is now possible to use separate keys for search and admin actions
- New standalone `prune_orphaned_activities` mix task with configurable batch limit
- The `prune_objects` mix task now accepts a `--limit` parameter for initial object pruning
- New config option `:instance, :cleanup_attachments_delay`
## Fixed
- Meilisearch: order of results returned from our REST API now actually matches how Meilisearch ranks results
@ -23,6 +24,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
- AP objects with additional JSON-LD profiles beyond ActivityStreams can now be fetched
- Single-selection polls no longer expose the voter_count; MastoAPI demands it be null
and this confused some clients leading to vote distributions >100%
- Fix “Delete & Redraft” often losing attachments if attachment cleanup was enabled
## Changed
- Refactored Rich Media to cache the content in the database. Fetching operations that could block status rendering have been eliminated.

View file

@ -255,6 +255,7 @@
external_user_synchronization: true,
extended_nickname_format: true,
cleanup_attachments: false,
cleanup_attachments_delay: 1800,
multi_factor_authentication: [
totp: [
# digits 6 or 8

View file

@ -58,6 +58,7 @@ To add configuration to your config file, you can copy it from the base config.
* `registration_reason_length`: Maximum registration reason length (default: `500`).
* `external_user_synchronization`: Enabling following/followers counters synchronization for external users.
* `cleanup_attachments`: Remove attachments along with statuses. Does not affect duplicate files and attachments without status. Enabling this will increase load to database when deleting statuses on larger instances.
* `cleanup_attachments_delay`: How many seconds to wait after post deletion before attempting to deletion; useful for “delete & redraft” functionality (default: `1800`)
* `show_reactions`: Let favourites and emoji reactions be viewed through the API (default: `true`).
* `password_reset_token_validity`: The time after which reset tokens aren't accepted anymore, in seconds (default: one day).
* `local_bubble`: Array of domains representing instances closely related to yours. Used to populate the `bubble` timeline. e.g `["example.com"]`, (default: `[]`)

View file

@ -9,7 +9,6 @@ defmodule Pleroma.Object do
import Ecto.Changeset
alias Pleroma.Activity
alias Pleroma.Config
alias Pleroma.Hashtag
alias Pleroma.Object
alias Pleroma.Object.Fetcher
@ -241,23 +240,11 @@ def delete(%Object{data: %{"id" => id}} = object) do
with {:ok, _obj} = swap_object_with_tombstone(object),
deleted_activity = Activity.delete_all_by_object_ap_id(id),
{:ok, _} <- invalid_object_cache(object) do
cleanup_attachments(
Config.get([:instance, :cleanup_attachments]),
%{object: object}
)
AttachmentsCleanupWorker.enqueue_if_needed(object.data)
{:ok, object, deleted_activity}
end
end
@spec cleanup_attachments(boolean(), %{required(:object) => map()}) ::
{:ok, Oban.Job.t() | nil}
def cleanup_attachments(true, %{object: _} = params) do
AttachmentsCleanupWorker.enqueue("cleanup_attachments", params)
end
def cleanup_attachments(_, _), do: {:ok, nil}
def prune(%Object{data: %{"id" => _id}} = object) do
with {:ok, object} <- Repo.delete(object),
{:ok, _} <- invalid_object_cache(object) do

View file

@ -5,30 +5,65 @@
defmodule Pleroma.Workers.AttachmentsCleanupWorker do
import Ecto.Query
alias Pleroma.Config
alias Pleroma.Object
alias Pleroma.Repo
use Pleroma.Workers.WorkerHelper, queue: "attachments_cleanup"
@doc """
Takes object data and if necessary enqueues a job,
deleting all attachments of the post eligible for cleanup
"""
@spec enqueue_if_needed(map()) :: {:ok, Oban.Job.t()} | {:ok, :skip} | {:error, any()}
def enqueue_if_needed(%{
"actor" => actor,
"attachment" => [_ | _] = attachments
}) do
with true <- Config.get([:instance, :cleanup_attachments]),
true <- URI.parse(actor).host == Pleroma.Web.Endpoint.host(),
[_ | _] <- attachments do
enqueue(
"cleanup_attachments",
%{"actor" => actor, "attachments" => attachments},
schedule_in: Config.get!([:instance, :cleanup_attachments_delay])
)
else
_ -> {:ok, :skip}
end
end
def enqueue_if_needed(_), do: {:ok, :skip}
@impl Oban.Worker
def perform(%Job{
args: %{
"op" => "cleanup_attachments",
"object" => %{"data" => %{"attachment" => [_ | _] = attachments, "actor" => actor}}
"attachments" => [_ | _] = attachments,
"actor" => actor
}
}) do
if Pleroma.Config.get([:instance, :cleanup_attachments], false) do
attachments
|> Enum.flat_map(fn item -> Enum.map(item["url"], & &1["href"]) end)
|> fetch_objects
|> prepare_objects(actor, Enum.map(attachments, & &1["name"]))
|> filter_objects
|> do_clean
end
attachments
|> Enum.flat_map(fn item -> Enum.map(item["url"], & &1["href"]) end)
|> fetch_objects
|> prepare_objects(actor, Enum.map(attachments, & &1["name"]))
|> filter_objects
|> do_clean
{:ok, :success}
end
# Left over already enqueued jobs in the old format
# This function clause can be deleted once sufficient time passed after 3.14
def perform(%Job{
args: %{
"op" => "cleanup_attachments",
"object" => %{"data" => data}
}
}) do
enqueue_if_needed(data)
end
def perform(%Job{args: %{"op" => "cleanup_attachments", "object" => _object}}), do: {:ok, :skip}
defp do_clean({object_ids, attachment_urls}) do

View file

@ -0,0 +1,86 @@
# Akkoma: Magically expressive social media
# Copyright © 2024 Akkoma Authors <https://akkoma.dev/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Workers.AttachmentsCleanupWorkerTest do
use Pleroma.DataCase, async: false
use Oban.Testing, repo: Pleroma.Repo
import Pleroma.Factory
alias Pleroma.Object
alias Pleroma.Workers.AttachmentsCleanupWorker
alias Pleroma.Tests.ObanHelpers
setup do
clear_config([:instance, :cleanup_attachments], true)
file = %Plug.Upload{
content_type: "image/jpeg",
path: Path.absname("test/fixtures/image.jpg"),
filename: "an_image.jpg"
}
user = insert(:user)
{:ok, %Pleroma.Object{} = attachment} =
Pleroma.Web.ActivityPub.ActivityPub.upload(file, actor: user.ap_id)
{:ok, attachment: attachment, user: user}
end
test "does not enqueue remote post" do
remote_data = %{
"id" => "https://remote.example/obj/123",
"actor" => "https://remote.example/user/1",
"content" => "content",
"attachment" => [
%{
"type" => "Document",
"mediaType" => "image/png",
"name" => "marvellous image",
"url" => "https://remote.example/files/image.png"
}
]
}
assert {:ok, :skip} = AttachmentsCleanupWorker.enqueue_if_needed(remote_data)
end
test "enqueues local post", %{attachment: attachment, user: user} do
local_url = Pleroma.Web.Endpoint.url()
local_data = %{
"id" => local_url <> "/obj/123",
"actor" => user.ap_id,
"content" => "content",
"attachment" => [attachment.data]
}
assert {:ok, %Oban.Job{}} = AttachmentsCleanupWorker.enqueue_if_needed(local_data)
end
test "doesn't delete immediately", %{attachment: attachment, user: user} do
delay = 6000
clear_config([:instance, :cleanup_attachments_delay], delay)
note = insert(:note, %{user: user, data: %{"attachment" => [attachment.data]}})
uploads_dir = Pleroma.Config.get!([Pleroma.Uploaders.Local, :uploads])
%{"url" => [%{"href" => href}]} = attachment.data
path = "#{uploads_dir}/#{Path.basename(href)}"
assert File.exists?(path)
Object.delete(note)
Process.sleep(2000)
assert File.exists?(path)
ObanHelpers.perform(all_enqueued(worker: Pleroma.Workers.AttachmentsCleanupWorker))
assert Object.get_by_id(note.id).data["deleted"]
assert Object.get_by_id(attachment.id) == nil
refute File.exists?(path)
end
end