Delete attachments asynchronously

This commit is contained in:
Roman Chvanikov 2020-01-19 19:45:20 +03:00
parent 81133702d4
commit d6a532bf0f
3 changed files with 99 additions and 73 deletions

View file

@ -19,6 +19,8 @@ defmodule Pleroma.Object do
@type t() :: %__MODULE__{}
@derive {Jason.Encoder, only: [:data]}
schema "objects" do
field(:data, :map)
@ -183,83 +185,14 @@ def delete(%Object{data: %{"id" => id}} = object) do
deleted_activity = Activity.delete_all_by_object_ap_id(id),
{:ok, true} <- Cachex.del(:object_cache, "object:#{id}"),
{:ok, _} <- Cachex.del(:web_resp_cache, URI.parse(id).path),
:ok <- delete_attachments(object) do
{:ok, _} <-
Pleroma.Workers.AttachmentsCleanupWorker.enqueue("cleanup_attachments", %{
"object" => object
}) do
{:ok, object, deleted_activity}
end
end
defp delete_attachments(%{data: %{"attachment" => [_ | _] = attachments, "actor" => actor}}) do
hrefs =
Enum.flat_map(attachments, fn attachment ->
Enum.map(attachment["url"], & &1["href"])
end)
names = Enum.map(attachments, & &1["name"])
uploader = Pleroma.Config.get([Pleroma.Upload, :uploader])
# find all objects for copies of the attachments, name and actor doesn't matter here
delete_ids =
from(o in Object,
where:
fragment(
"to_jsonb(array(select jsonb_array_elements((?)#>'{url}') ->> 'href' where jsonb_typeof((?)#>'{url}') = 'array'))::jsonb \\?| (?)",
o.data,
o.data,
^hrefs
)
)
|> Repo.all()
# we should delete 1 object for any given attachment, but don't delete files if
# there are more than 1 object for it
|> Enum.reduce(%{}, fn %{
id: id,
data: %{
"url" => [%{"href" => href}],
"actor" => obj_actor,
"name" => name
}
},
acc ->
Map.update(acc, href, %{id: id, count: 1}, fn val ->
case obj_actor == actor and name in names do
true ->
# set id of the actor's object that will be deleted
%{val | id: id, count: val.count + 1}
false ->
# another actor's object, just increase count to not delete file
%{val | count: val.count + 1}
end
end)
end)
|> Enum.map(fn {href, %{id: id, count: count}} ->
# only delete files that have single instance
with 1 <- count do
prefix =
case Pleroma.Config.get([Pleroma.Upload, :base_url]) do
nil -> "media"
_ -> ""
end
base_url = Pleroma.Config.get([__MODULE__, :base_url], Pleroma.Web.base_url())
file_path = String.trim_leading(href, "#{base_url}/#{prefix}")
uploader.delete_file(file_path)
end
id
end)
from(o in Object, where: o.id in ^delete_ids)
|> Repo.delete_all()
:ok
end
defp delete_attachments(%{data: _data}), do: :ok
def prune(%Object{data: %{"id" => id}} = object) do
with {:ok, object} <- Repo.delete(object),
{:ok, true} <- Cachex.del(:object_cache, "object:#{id}"),

View file

@ -0,0 +1,87 @@
# Pleroma: A lightweight social networking server
# Copyright © 2017-2019 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Workers.AttachmentsCleanupWorker do
import Ecto.Query
alias Pleroma.Object
alias Pleroma.Repo
use Pleroma.Workers.WorkerHelper, queue: "attachments_cleanup"
@impl Oban.Worker
def perform(
%{"object" => %{"data" => %{"attachment" => [_ | _] = attachments, "actor" => actor}}},
_job
) do
hrefs =
Enum.flat_map(attachments, fn attachment ->
Enum.map(attachment["url"], & &1["href"])
end)
names = Enum.map(attachments, & &1["name"])
uploader = Pleroma.Config.get([Pleroma.Upload, :uploader])
# find all objects for copies of the attachments, name and actor doesn't matter here
delete_ids =
from(o in Object,
where:
fragment(
"to_jsonb(array(select jsonb_array_elements((?)#>'{url}') ->> 'href' where jsonb_typeof((?)#>'{url}') = 'array'))::jsonb \\?| (?)",
o.data,
o.data,
^hrefs
)
)
# The query above can be time consumptive on large instances until we refactor how uploads are stored
|> Repo.all(timout: :infinity)
# we should delete 1 object for any given attachment, but don't delete files if
# there are more than 1 object for it
|> Enum.reduce(%{}, fn %{
id: id,
data: %{
"url" => [%{"href" => href}],
"actor" => obj_actor,
"name" => name
}
},
acc ->
Map.update(acc, href, %{id: id, count: 1}, fn val ->
case obj_actor == actor and name in names do
true ->
# set id of the actor's object that will be deleted
%{val | id: id, count: val.count + 1}
false ->
# another actor's object, just increase count to not delete file
%{val | count: val.count + 1}
end
end)
end)
|> Enum.map(fn {href, %{id: id, count: count}} ->
# only delete files that have single instance
with 1 <- count do
prefix =
case Pleroma.Config.get([Pleroma.Upload, :base_url]) do
nil -> "media"
_ -> ""
end
base_url = Pleroma.Config.get([__MODULE__, :base_url], Pleroma.Web.base_url())
file_path = String.trim_leading(href, "#{base_url}/#{prefix}")
uploader.delete_file(file_path)
end
id
end)
from(o in Object, where: o.id in ^delete_ids)
|> Repo.delete_all()
end
def perform(%{"object" => _object}, _job), do: :ok
end

View file

@ -4,12 +4,14 @@
defmodule Pleroma.ObjectTest do
use Pleroma.DataCase
use Oban.Testing, repo: Pleroma.Repo
import ExUnit.CaptureLog
import Pleroma.Factory
import Tesla.Mock
alias Pleroma.Activity
alias Pleroma.Object
alias Pleroma.Repo
alias Pleroma.Tests.ObanHelpers
alias Pleroma.Web.CommonAPI
setup do
@ -99,6 +101,8 @@ test "in subdirectories" do
Object.delete(note)
ObanHelpers.perform(all_enqueued(worker: Pleroma.Workers.AttachmentsCleanupWorker))
assert Object.get_by_id(attachment.id) == nil
assert {:ok, []} == File.ls("#{uploads_dir}/#{path}")
@ -133,6 +137,8 @@ test "with dedupe enabled" do
Object.delete(note)
ObanHelpers.perform(all_enqueued(worker: Pleroma.Workers.AttachmentsCleanupWorker))
assert Object.get_by_id(attachment.id) == nil
assert {:ok, files} = File.ls(uploads_dir)
refute filename in files