Delete attachments asynchronously

This commit is contained in:
Roman Chvanikov 2020-01-19 19:45:20 +03:00
parent 81133702d4
commit d6a532bf0f
3 changed files with 99 additions and 73 deletions

View file

@ -19,6 +19,8 @@ defmodule Pleroma.Object do
@type t() :: %__MODULE__{} @type t() :: %__MODULE__{}
@derive {Jason.Encoder, only: [:data]}
schema "objects" do schema "objects" do
field(:data, :map) field(:data, :map)
@ -183,83 +185,14 @@ def delete(%Object{data: %{"id" => id}} = object) do
deleted_activity = Activity.delete_all_by_object_ap_id(id), deleted_activity = Activity.delete_all_by_object_ap_id(id),
{:ok, true} <- Cachex.del(:object_cache, "object:#{id}"), {:ok, true} <- Cachex.del(:object_cache, "object:#{id}"),
{:ok, _} <- Cachex.del(:web_resp_cache, URI.parse(id).path), {:ok, _} <- Cachex.del(:web_resp_cache, URI.parse(id).path),
:ok <- delete_attachments(object) do {:ok, _} <-
Pleroma.Workers.AttachmentsCleanupWorker.enqueue("cleanup_attachments", %{
"object" => object
}) do
{:ok, object, deleted_activity} {:ok, object, deleted_activity}
end end
end end
defp delete_attachments(%{data: %{"attachment" => [_ | _] = attachments, "actor" => actor}}) do
hrefs =
Enum.flat_map(attachments, fn attachment ->
Enum.map(attachment["url"], & &1["href"])
end)
names = Enum.map(attachments, & &1["name"])
uploader = Pleroma.Config.get([Pleroma.Upload, :uploader])
# find all objects for copies of the attachments, name and actor doesn't matter here
delete_ids =
from(o in Object,
where:
fragment(
"to_jsonb(array(select jsonb_array_elements((?)#>'{url}') ->> 'href' where jsonb_typeof((?)#>'{url}') = 'array'))::jsonb \\?| (?)",
o.data,
o.data,
^hrefs
)
)
|> Repo.all()
# we should delete 1 object for any given attachment, but don't delete files if
# there are more than 1 object for it
|> Enum.reduce(%{}, fn %{
id: id,
data: %{
"url" => [%{"href" => href}],
"actor" => obj_actor,
"name" => name
}
},
acc ->
Map.update(acc, href, %{id: id, count: 1}, fn val ->
case obj_actor == actor and name in names do
true ->
# set id of the actor's object that will be deleted
%{val | id: id, count: val.count + 1}
false ->
# another actor's object, just increase count to not delete file
%{val | count: val.count + 1}
end
end)
end)
|> Enum.map(fn {href, %{id: id, count: count}} ->
# only delete files that have single instance
with 1 <- count do
prefix =
case Pleroma.Config.get([Pleroma.Upload, :base_url]) do
nil -> "media"
_ -> ""
end
base_url = Pleroma.Config.get([__MODULE__, :base_url], Pleroma.Web.base_url())
file_path = String.trim_leading(href, "#{base_url}/#{prefix}")
uploader.delete_file(file_path)
end
id
end)
from(o in Object, where: o.id in ^delete_ids)
|> Repo.delete_all()
:ok
end
defp delete_attachments(%{data: _data}), do: :ok
def prune(%Object{data: %{"id" => id}} = object) do def prune(%Object{data: %{"id" => id}} = object) do
with {:ok, object} <- Repo.delete(object), with {:ok, object} <- Repo.delete(object),
{:ok, true} <- Cachex.del(:object_cache, "object:#{id}"), {:ok, true} <- Cachex.del(:object_cache, "object:#{id}"),

View file

@ -0,0 +1,87 @@
# Pleroma: A lightweight social networking server
# Copyright © 2017-2019 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Workers.AttachmentsCleanupWorker do
import Ecto.Query
alias Pleroma.Object
alias Pleroma.Repo
use Pleroma.Workers.WorkerHelper, queue: "attachments_cleanup"
@impl Oban.Worker
def perform(
%{"object" => %{"data" => %{"attachment" => [_ | _] = attachments, "actor" => actor}}},
_job
) do
hrefs =
Enum.flat_map(attachments, fn attachment ->
Enum.map(attachment["url"], & &1["href"])
end)
names = Enum.map(attachments, & &1["name"])
uploader = Pleroma.Config.get([Pleroma.Upload, :uploader])
# find all objects for copies of the attachments, name and actor doesn't matter here
delete_ids =
from(o in Object,
where:
fragment(
"to_jsonb(array(select jsonb_array_elements((?)#>'{url}') ->> 'href' where jsonb_typeof((?)#>'{url}') = 'array'))::jsonb \\?| (?)",
o.data,
o.data,
^hrefs
)
)
# The query above can be time consumptive on large instances until we refactor how uploads are stored
|> Repo.all(timout: :infinity)
# we should delete 1 object for any given attachment, but don't delete files if
# there are more than 1 object for it
|> Enum.reduce(%{}, fn %{
id: id,
data: %{
"url" => [%{"href" => href}],
"actor" => obj_actor,
"name" => name
}
},
acc ->
Map.update(acc, href, %{id: id, count: 1}, fn val ->
case obj_actor == actor and name in names do
true ->
# set id of the actor's object that will be deleted
%{val | id: id, count: val.count + 1}
false ->
# another actor's object, just increase count to not delete file
%{val | count: val.count + 1}
end
end)
end)
|> Enum.map(fn {href, %{id: id, count: count}} ->
# only delete files that have single instance
with 1 <- count do
prefix =
case Pleroma.Config.get([Pleroma.Upload, :base_url]) do
nil -> "media"
_ -> ""
end
base_url = Pleroma.Config.get([__MODULE__, :base_url], Pleroma.Web.base_url())
file_path = String.trim_leading(href, "#{base_url}/#{prefix}")
uploader.delete_file(file_path)
end
id
end)
from(o in Object, where: o.id in ^delete_ids)
|> Repo.delete_all()
end
def perform(%{"object" => _object}, _job), do: :ok
end

View file

@ -4,12 +4,14 @@
defmodule Pleroma.ObjectTest do defmodule Pleroma.ObjectTest do
use Pleroma.DataCase use Pleroma.DataCase
use Oban.Testing, repo: Pleroma.Repo
import ExUnit.CaptureLog import ExUnit.CaptureLog
import Pleroma.Factory import Pleroma.Factory
import Tesla.Mock import Tesla.Mock
alias Pleroma.Activity alias Pleroma.Activity
alias Pleroma.Object alias Pleroma.Object
alias Pleroma.Repo alias Pleroma.Repo
alias Pleroma.Tests.ObanHelpers
alias Pleroma.Web.CommonAPI alias Pleroma.Web.CommonAPI
setup do setup do
@ -99,6 +101,8 @@ test "in subdirectories" do
Object.delete(note) Object.delete(note)
ObanHelpers.perform(all_enqueued(worker: Pleroma.Workers.AttachmentsCleanupWorker))
assert Object.get_by_id(attachment.id) == nil assert Object.get_by_id(attachment.id) == nil
assert {:ok, []} == File.ls("#{uploads_dir}/#{path}") assert {:ok, []} == File.ls("#{uploads_dir}/#{path}")
@ -133,6 +137,8 @@ test "with dedupe enabled" do
Object.delete(note) Object.delete(note)
ObanHelpers.perform(all_enqueued(worker: Pleroma.Workers.AttachmentsCleanupWorker))
assert Object.get_by_id(attachment.id) == nil assert Object.get_by_id(attachment.id) == nil
assert {:ok, files} = File.ls(uploads_dir) assert {:ok, files} = File.ls(uploads_dir)
refute filename in files refute filename in files