Dedupe uploads

This commit is contained in:
Sir_Boops 2018-04-15 17:37:51 -06:00
parent b5d8213e70
commit 3f0440ac3c
No known key found for this signature in database
GPG key ID: 6DF2B9312201B66B
4 changed files with 117 additions and 49 deletions

View file

@ -8,7 +8,8 @@ config :pleroma, :instance,
name: "<%= name %>",
email: "<%= email %>",
limit: 5000,
registrations_open: true
registrations_open: true,
dedupe_media: true
config :pleroma, :media_proxy,
enabled: false,

View file

@ -2,20 +2,21 @@ defmodule Pleroma.Upload do
alias Ecto.UUID
alias Pleroma.Web
def store(%Plug.Upload{} = file) do
uuid = UUID.generate()
upload_folder = Path.join(upload_path(), uuid)
File.mkdir_p!(upload_folder)
result_file = Path.join(upload_folder, file.filename)
File.cp!(file.path, result_file)
def store(%Plug.Upload{} = file, should_dedupe) do
content_type = get_content_type(file.path)
uuid = get_uuid(file, should_dedupe)
name = get_name(file, uuid, content_type, should_dedupe)
upload_folder = get_upload_path(uuid, should_dedupe)
url_path = get_url(name, uuid, should_dedupe)
# fix content type on some image uploads
content_type =
if file.content_type in [nil, "application/octet-stream"] do
get_content_type(file.path)
else
file.content_type
end
File.mkdir_p!(upload_folder)
result_file = Path.join(upload_folder, name)
if File.exists?(result_file) do
File.rm!(file.path)
else
File.cp!(file.path, result_file)
end
%{
"type" => "Image",
@ -23,26 +24,48 @@ def store(%Plug.Upload{} = file) do
%{
"type" => "Link",
"mediaType" => content_type,
"href" => url_for(Path.join(uuid, :cow_uri.urlencode(file.filename)))
"href" => url_path
}
],
"name" => file.filename,
"uuid" => uuid
"name" => name
}
end
def store(%{"img" => "data:image/" <> image_data}) do
def store(%{"img" => "data:image/" <> image_data}, should_dedupe) do
parsed = Regex.named_captures(~r/(?<filetype>jpeg|png|gif);base64,(?<data>.*)/, image_data)
data = Base.decode64!(parsed["data"])
data = Base.decode64!(parsed["data"], ignore: :whitespace)
uuid = UUID.generate()
upload_folder = Path.join(upload_path(), uuid)
uuidpath = Path.join(upload_path(), uuid)
uuid = UUID.generate()
File.mkdir_p!(upload_path())
File.write!(uuidpath, data)
content_type = get_content_type(uuidpath)
name =
create_name(
String.downcase(Base.encode16(:crypto.hash(:sha256, data))),
parsed["filetype"],
content_type
)
upload_folder = get_upload_path(uuid, should_dedupe)
url_path = get_url(name, uuid, should_dedupe)
File.mkdir_p!(upload_folder)
filename = Base.encode16(:crypto.hash(:sha256, data)) <> ".#{parsed["filetype"]}"
result_file = Path.join(upload_folder, filename)
result_file = Path.join(upload_folder, name)
File.write!(result_file, data)
content_type = "image/#{parsed["filetype"]}"
if should_dedupe do
if !File.exists?(result_file) do
File.rename(uuidpath, result_file)
else
File.rm!(uuidpath)
end
else
File.rename(uuidpath, result_file)
end
%{
"type" => "Image",
@ -50,11 +73,10 @@ def store(%{"img" => "data:image/" <> image_data}) do
%{
"type" => "Link",
"mediaType" => content_type,
"href" => url_for(Path.join(uuid, :cow_uri.urlencode(filename)))
"href" => url_path
}
],
"name" => filename,
"uuid" => uuid
"name" => name
}
end
@ -63,6 +85,46 @@ def upload_path do
Keyword.fetch!(settings, :uploads)
end
defp create_name(uuid, ext, type) do
if type == "application/octet-stream" do
String.downcase(Enum.join([uuid, ext], "."))
else
String.downcase(Enum.join([uuid, List.last(String.split(type, "/"))], "."))
end
end
defp get_uuid(file, should_dedupe) do
if should_dedupe do
Base.encode16(:crypto.hash(:sha256, File.read!(file.path)))
else
UUID.generate()
end
end
defp get_name(file, uuid, type, should_dedupe) do
if should_dedupe do
create_name(uuid, List.last(String.split(file.filename, ".")), type)
else
file.filename
end
end
defp get_upload_path(uuid, should_dedupe) do
if should_dedupe do
upload_path()
else
Path.join(upload_path(), uuid)
end
end
defp get_url(name, uuid, should_dedupe) do
if should_dedupe do
url_for(:cow_uri.urlencode(name))
else
url_for(Path.join(uuid, :cow_uri.urlencode(name)))
end
end
defp url_for(file) do
"#{Web.base_url()}/media/#{file}"
end

View file

@ -492,7 +492,7 @@ def fetch_activities(recipients, opts \\ %{}) do
end
def upload(file) do
data = Upload.store(file)
data = Upload.store(file, Application.get_env(:pleroma, :instance)[:dedupe_media])
Repo.insert(%Object{data: data})
end

View file

@ -3,40 +3,45 @@ defmodule Pleroma.UploadTest do
use Pleroma.DataCase
describe "Storing a file" do
test "copies the file to the configured folder" do
test "copies the file to the configured folder with deduping" do
File.cp!("test/fixtures/image.jpg", "test/fixtures/image_tmp.jpg")
file = %Plug.Upload{
content_type: "image/jpg",
path: Path.absname("test/fixtures/image.jpg"),
path: Path.absname("test/fixtures/image_tmp.jpg"),
filename: "an [image.jpg"
}
data = Upload.store(file)
assert data["name"] == "an [image.jpg"
data = Upload.store(file, true)
assert List.first(data["url"])["href"] ==
"http://localhost:4001/media/#{data["uuid"]}/an%20%5Bimage.jpg"
assert data["name"] ==
"e7a6d0cf595bff76f14c9a98b6c199539559e8b844e02e51e5efcfd1f614a2df.jpeg"
end
test "fixes an incorrect content type" do
test "copies the file to the configured folder without deduping" do
File.cp!("test/fixtures/image.jpg", "test/fixtures/image_tmp.jpg")
file = %Plug.Upload{
content_type: "image/jpg",
path: Path.absname("test/fixtures/image_tmp.jpg"),
filename: "an [image.jpg"
}
data = Upload.store(file, false)
assert data["name"] == "an [image.jpg"
end
test "fixes incorrect content type" do
File.cp!("test/fixtures/image.jpg", "test/fixtures/image_tmp.jpg")
file = %Plug.Upload{
content_type: "application/octet-stream",
path: Path.absname("test/fixtures/image.jpg"),
path: Path.absname("test/fixtures/image_tmp.jpg"),
filename: "an [image.jpg"
}
data = Upload.store(file)
data = Upload.store(file, true)
assert hd(data["url"])["mediaType"] == "image/jpeg"
end
test "does not modify a valid content type" do
file = %Plug.Upload{
content_type: "image/png",
path: Path.absname("test/fixtures/image.jpg"),
filename: "an [image.jpg"
}
data = Upload.store(file)
assert hd(data["url"])["mediaType"] == "image/png"
end
end
end