Merge pull request 'Read image description from EXIF data' (#744) from timorl/akkoma:elseinspe into develop

Reviewed-on: AkkomaGang/akkoma#744
This commit is contained in:
floatingghost 2024-04-25 12:52:31 +00:00
commit b1c6621e66
22 changed files with 433 additions and 78 deletions

View file

@ -9,9 +9,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
## Added
- Support for [FEP-fffd](https://codeberg.org/fediverse/fep/src/branch/main/fep/fffd/fep-fffd.md) (proxy objects)
- Verified support for elixir 1.16
- Uploadfilter `Pleroma.Upload.Filter.Exiftool.ReadDescription` returns description values to the FE so they can pre fill the image description field
## Changed
- Inbound pipeline error handing was modified somewhat, which should lead to less incomprehensible log spam. Hopefully.
- Uploadfilter `Pleroma.Upload.Filter.Exiftool` has been renamed to `Pleroma.Upload.Filter.Exiftool.StripMetadata`
## Fixed
- Issue preventing fetching anything from IPv6-only instances

View file

@ -37,7 +37,8 @@ If any of the options are left unspecified, you will be prompted interactively.
- `--static-dir <path>` - the directory custom public files should be read from (custom emojis, frontend bundle overrides, robots.txt, etc.)
- `--listen-ip <ip>` - the ip the app should listen to, defaults to 127.0.0.1
- `--listen-port <port>` - the port the app should listen to, defaults to 4000
- `--strip-uploads <Y|N>` - use ExifTool to strip uploads of sensitive location data
- `--strip-uploads-metadata <Y|N>` - use ExifTool to strip uploads of sensitive metadata
- `--read-uploads-description <Y|N>` - use ExifTool to read image descriptions from uploads
- `--anonymize-uploads <Y|N>` - randomize uploaded filenames
- `--dedupe-uploads <Y|N>` - store files based on their hash to reduce data storage requirements if duplicates are uploaded with different filenames
- `--skip-release-env` - skip generation the release environment file

View file

@ -654,12 +654,19 @@ This filter replaces the declared filename (not the path) of an upload.
* `text`: Text to replace filenames in links. If empty, `{random}.extension` will be used. You can get the original filename extension by using `{extension}`, for example `custom-file-name.{extension}`.
#### Pleroma.Upload.Filter.Exiftool
#### Pleroma.Upload.Filter.Exiftool.StripMetadata
This filter only strips the GPS and location metadata with Exiftool leaving color profiles and attributes intact.
No specific configuration.
#### Pleroma.Upload.Filter.Exiftool.ReadDescription
This filter reads the ImageDescription and iptc:Caption-Abstract fields with Exiftool so clients can prefill the media description field.
No specific configuration.
#### Pleroma.Upload.Filter.OnlyMedia
This filter rejects uploads that are not identified with Content-Type matching audio/\*, image/\*, or video/\*

View file

@ -29,4 +29,5 @@ It is required for the following Akkoma features:
`exiftool` is media files metadata reader/writer.
It is required for the following Akkoma features:
* `Pleroma.Upload.Filters.Exiftool` upload filter (related config: `Plaroma.Upload/filters` in `config/config.exs`)
* `Pleroma.Upload.Filters.Exiftool.StripMetadata` upload filter (related config: `Plaroma.Upload/filters` in `config/config.exs`)
* `Pleroma.Upload.Filters.Exiftool.ReadDescription` upload filter (related config: `Plaroma.Upload/filters` in `config/config.exs`)

View file

@ -35,7 +35,8 @@ def run(["gen" | rest]) do
static_dir: :string,
listen_ip: :string,
listen_port: :string,
strip_uploads: :string,
strip_uploads_metadata: :string,
read_uploads_description: :string,
anonymize_uploads: :string
],
aliases: [
@ -169,7 +170,7 @@ def run(["gen" | rest]) do
)
|> Path.expand()
{strip_uploads_message, strip_uploads_default} =
{strip_uploads_metadata_message, strip_uploads_metadata_default} =
if Pleroma.Utils.command_available?("exiftool") do
{"Do you want to strip location (GPS) data from uploaded images? This requires exiftool, it was detected as installed. (y/n)",
"y"}
@ -178,12 +179,29 @@ def run(["gen" | rest]) do
"n"}
end
strip_uploads =
strip_uploads_metadata =
get_option(
options,
:strip_uploads,
strip_uploads_message,
strip_uploads_default
:strip_uploads_metadata,
strip_uploads_metadata_message,
strip_uploads_metadata_default
) === "y"
{read_uploads_description_message, read_uploads_description_default} =
if Pleroma.Utils.command_available?("exiftool") do
{"Do you want to read data from uploaded files so clients can use it to prefill fields like image description? This requires exiftool, it was detected as installed. (y/n)",
"y"}
else
{"Do you want to read data from uploaded files so clients can use it to prefill fields like image description? This requires exiftool, it was detected as not installed, please install it if you answer yes. (y/n)",
"n"}
end
read_uploads_description =
get_option(
options,
:read_uploads_description,
read_uploads_description_message,
read_uploads_description_default
) === "y"
anonymize_uploads =
@ -230,7 +248,8 @@ def run(["gen" | rest]) do
listen_port: listen_port,
upload_filters:
upload_filters(%{
strip: strip_uploads,
strip_metadata: strip_uploads_metadata,
read_description: read_uploads_description,
anonymize: anonymize_uploads
})
)
@ -305,11 +324,20 @@ defp write_robots_txt(static_dir, indexable, template_dir) do
end
defp upload_filters(filters) when is_map(filters) do
enabled_filters = []
enabled_filters =
if filters.strip do
[Pleroma.Upload.Filter.Exiftool]
if filters.read_description do
enabled_filters ++ [Pleroma.Upload.Filter.Exiftool.ReadDescription]
else
[]
enabled_filters
end
enabled_filters =
if filters.strip_metadata do
enabled_filters ++ [Pleroma.Upload.Filter.Exiftool.StripMetadata]
else
enabled_filters
end
enabled_filters =

View file

@ -164,7 +164,8 @@ defp do_check_rum!(setting, migrate) do
defp check_system_commands!(:ok) do
filter_commands_statuses = [
check_filter(Pleroma.Upload.Filter.Exiftool, "exiftool"),
check_filter(Pleroma.Upload.Filter.Exiftool.StripMetadata, "exiftool"),
check_filter(Pleroma.Upload.Filter.Exiftool.ReadDescription, "exiftool"),
check_filter(Pleroma.Upload.Filter.Mogrify, "mogrify"),
check_filter(Pleroma.Upload.Filter.Mogrifun, "mogrify"),
check_filter(Pleroma.Upload.Filter.AnalyzeMetadata, "mogrify"),

View file

@ -22,6 +22,43 @@ defmodule Pleroma.Config.DeprecationWarnings do
"\n* `config :pleroma, :instance, :quarantined_instances` is now covered by `:pleroma, :mrf_simple, :reject`"}
]
def check_exiftool_filter do
filters = Config.get([Pleroma.Upload]) |> Keyword.get(:filters, [])
if Pleroma.Upload.Filter.Exiftool in filters do
Logger.warning("""
!!!DEPRECATION WARNING!!!
Your config is using Exiftool as a filter instead of Exiftool.StripMetadata. This should work for now, but you are advised to change to the new configuration to prevent possible issues later:
```
config :pleroma, Pleroma.Upload,
filters: [Pleroma.Upload.Filter.Exiftool]
```
Is now
```
config :pleroma, Pleroma.Upload,
filters: [Pleroma.Upload.Filter.Exiftool.StripMetadata]
```
""")
new_config =
filters
|> Enum.map(fn
Pleroma.Upload.Filter.Exiftool -> Pleroma.Upload.Filter.Exiftool.StripMetadata
filter -> filter
end)
Config.put([Pleroma.Upload, :filters], new_config)
:error
else
:ok
end
end
def check_simple_policy_tuples do
has_strings =
Config.get([:mrf_simple])
@ -184,7 +221,8 @@ def warn do
check_simple_policy_tuples(),
check_http_adapter(),
check_uploader_base_url_set(),
check_uploader_base_url_is_not_base_domain()
check_uploader_base_url_is_not_base_domain(),
check_exiftool_filter()
]
|> Enum.reduce(:ok, fn
:ok, :ok -> :ok

View file

@ -61,12 +61,23 @@ defmodule Pleroma.Upload do
width: integer(),
height: integer(),
blurhash: String.t(),
description: String.t(),
path: String.t()
}
@always_enabled_filters [Pleroma.Upload.Filter.Dedupe]
defstruct [:id, :name, :tempfile, :content_type, :width, :height, :blurhash, :path]
defstruct [
:id,
:name,
:tempfile,
:content_type,
:width,
:height,
:blurhash,
:description,
:path
]
@spec store(source, options :: [option()]) :: {:ok, Map.t()} | {:error, any()}
@doc "Store a file. If using a `Plug.Upload{}` as the source, be sure to use `Majic.Plug` to ensure its content_type and filename is correct."
@ -76,7 +87,7 @@ def store(upload, opts \\ []) do
with {:ok, upload} <- prepare_upload(upload, opts),
upload = %__MODULE__{upload | path: upload.path || "#{upload.id}/#{upload.name}"},
{:ok, upload} <- Pleroma.Upload.Filter.filter(opts.filters, upload),
description = Map.get(opts, :description) || "",
description = Map.get(upload, :description) || "",
{_, true} <-
{:description_limit,
String.length(description) <= Pleroma.Config.get([:instance, :description_limit])},
@ -152,7 +163,8 @@ defp prepare_upload(%Plug.Upload{} = file, opts) do
id: UUID.generate(),
name: file.filename,
tempfile: file.path,
content_type: file.content_type
content_type: file.content_type,
description: opts.description
}}
end
end
@ -172,7 +184,8 @@ defp prepare_upload(%{img: "data:image/" <> image_data}, opts) do
id: UUID.generate(),
name: hash <> "." <> ext,
tempfile: tmp_path,
content_type: content_type
content_type: content_type,
description: opts.description
}}
end
end

View file

@ -0,0 +1,52 @@
# Pleroma: A lightweight social networking server
# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Upload.Filter.Exiftool.ReadDescription do
@moduledoc """
Gets a valid description from the related EXIF tags and provides them in the response if no description is provided yet.
It will first check ImageDescription, when that doesn't probide a valid description, it will check iptc:Caption-Abstract.
A valid description means the fields are filled in and not too long (see `:instance, :description_limit`).
"""
@behaviour Pleroma.Upload.Filter
@spec filter(Pleroma.Upload.t()) :: {:ok, any()} | {:error, String.t()}
def filter(%Pleroma.Upload{description: description})
when is_binary(description),
do: {:ok, :noop}
def filter(%Pleroma.Upload{tempfile: file} = upload),
do: {:ok, :filtered, upload |> Map.put(:description, read_description_from_exif_data(file))}
def filter(_, _), do: {:ok, :noop}
defp read_description_from_exif_data(file) do
nil
|> read_when_empty(file, "-ImageDescription")
|> read_when_empty(file, "-iptc:Caption-Abstract")
end
defp read_when_empty(current_description, _, _) when is_binary(current_description),
do: current_description
defp read_when_empty(_, file, tag) do
try do
{tag_content, 0} =
System.cmd("exiftool", ["-b", "-s3", tag, file],
stderr_to_stdout: true,
parallelism: true
)
tag_content = String.trim(tag_content)
if tag_content != "" and
String.length(tag_content) <=
Pleroma.Config.get([:instance, :description_limit]),
do: tag_content,
else: nil
rescue
_ in ErlangError -> nil
end
end
end

View file

@ -2,7 +2,7 @@
# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Upload.Filter.Exiftool do
defmodule Pleroma.Upload.Filter.Exiftool.StripMetadata do
@moduledoc """
Strips GPS related EXIF tags and overwrites the file in place.
Also strips or replaces filesystem metadata e.g., timestamps.

View file

@ -151,8 +151,7 @@ defp summary_fallback_metrics(byte_unit \\ :byte) do
# phoenix.router_dispatch.stop.duration
# pleroma.repo.query.total_time
# pleroma.repo.query.queue_time
dist_metrics =
[
dist_metrics = [
distribution("phoenix.endpoint.stop.duration.fdist",
event_name: [:phoenix, :endpoint, :stop],
measurement: :duration,

View file

@ -0,0 +1,37 @@
defmodule Pleroma.Repo.Migrations.UploadFilterExiftoolToExiftoolStripMetadata do
use Ecto.Migration
alias Pleroma.ConfigDB
def up,
do:
ConfigDB.get_by_params(%{group: :pleroma, key: Pleroma.Upload})
|> update_filtername(
Pleroma.Upload.Filter.Exiftool,
Pleroma.Upload.Filter.Exiftool.StripMetadata
)
def down,
do:
ConfigDB.get_by_params(%{group: :pleroma, key: Pleroma.Upload})
|> update_filtername(
Pleroma.Upload.Filter.Exiftool.StripMetadata,
Pleroma.Upload.Filter.Exiftool
)
defp update_filtername(%{value: value}, from_filtername, to_filtername) do
new_value =
value
|> Keyword.update(:filters, [], fn filters ->
filters
|> Enum.map(fn
^from_filtername -> to_filtername
filter -> filter
end)
end)
ConfigDB.update_or_create(%{group: :pleroma, key: Pleroma.Upload, value: new_value})
end
defp update_filtername(_, _, _), do: nil
end

Binary file not shown.

After

Width:  |  Height:  |  Size: 697 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 823 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 785 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 631 B

View file

@ -69,7 +69,9 @@ test "running gen" do
"test/uploads",
"--static-dir",
"./test/../test/instance/static/",
"--strip-uploads",
"--strip-uploads-metadata",
"y",
"--read-uploads-description",
"y",
"--anonymize-uploads",
"n"
@ -91,7 +93,10 @@ test "running gen" do
assert generated_config =~ "password: \"dbpass\""
assert generated_config =~ "configurable_from_database: true"
assert generated_config =~ "http: [ip: {127, 0, 0, 1}, port: 4000]"
assert generated_config =~ "filters: [Pleroma.Upload.Filter.Exiftool]"
assert generated_config =~
"filters: [Pleroma.Upload.Filter.Exiftool.ReadDescription, Pleroma.Upload.Filter.Exiftool.StripMetadata]"
assert generated_config =~ "base_url: \"https://media.pleroma.social/media\""
assert File.read!(tmp_path() <> "setup.psql") == generated_setup_psql()
assert File.exists?(Path.expand("./test/instance/static/robots.txt"))

View file

@ -11,6 +11,62 @@ defmodule Pleroma.Config.DeprecationWarningsTest do
alias Pleroma.Config
alias Pleroma.Config.DeprecationWarnings
describe "filter exiftool" do
test "gives warning when still used" do
clear_config(
[Pleroma.Upload, :filters],
[Pleroma.Upload.Filter.Exiftool]
)
assert capture_log(fn -> DeprecationWarnings.check_exiftool_filter() end) =~
"""
!!!DEPRECATION WARNING!!!
Your config is using Exiftool as a filter instead of Exiftool.StripMetadata. This should work for now, but you are advised to change to the new configuration to prevent possible issues later:
```
config :pleroma, Pleroma.Upload,
filters: [Pleroma.Upload.Filter.Exiftool]
```
Is now
```
config :pleroma, Pleroma.Upload,
filters: [Pleroma.Upload.Filter.Exiftool.StripMetadata]
```
"""
end
test "changes setting to exiftool strip metadata" do
clear_config(
[Pleroma.Upload, :filters],
[Pleroma.Upload.Filter.Exiftool, Pleroma.Upload.Filter.Exiftool.ReadDescription]
)
expected_config = [
Pleroma.Upload.Filter.Exiftool.StripMetadata,
Pleroma.Upload.Filter.Exiftool.ReadDescription
]
capture_log(fn -> DeprecationWarnings.warn() end)
assert Config.get([Pleroma.Upload]) |> Keyword.get(:filters, []) == expected_config
end
test "doesn't give a warning with correct config" do
clear_config(
[Pleroma.Upload, :filters],
[
Pleroma.Upload.Filter.Exiftool.StripMetadata,
Pleroma.Upload.Filter.Exiftool.ReadDescription
]
)
assert capture_log(fn -> DeprecationWarnings.check_exiftool_filter() end) == ""
end
end
describe "simple policy tuples" do
test "gives warning when there are still strings" do
clear_config([:mrf_simple],

View file

@ -0,0 +1,117 @@
# Pleroma: A lightweight social networking server
# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Upload.Filter.Exiftool.ReadDescriptionTest do
use Pleroma.DataCase, async: true
alias Pleroma.Upload.Filter
@uploads %Pleroma.Upload{
name: "image_with_imagedescription_and_caption-abstract.jpg",
content_type: "image/jpeg",
path: Path.absname("test/fixtures/image_with_imagedescription_and_caption-abstract.jpg"),
tempfile: Path.absname("test/fixtures/image_with_imagedescription_and_caption-abstract.jpg"),
description: nil
}
test "keeps description when not empty" do
uploads = %Pleroma.Upload{
name: "image_with_imagedescription_and_caption-abstract.jpg",
content_type: "image/jpeg",
path: Path.absname("test/fixtures/image_with_imagedescription_and_caption-abstract.jpg"),
tempfile:
Path.absname("test/fixtures/image_with_imagedescription_and_caption-abstract.jpg"),
description: "Some description"
}
assert Filter.Exiftool.ReadDescription.filter(uploads) ==
{:ok, :noop}
end
test "otherwise returns ImageDescription when present" do
uploads_after = %Pleroma.Upload{
name: "image_with_imagedescription_and_caption-abstract.jpg",
content_type: "image/jpeg",
path: Path.absname("test/fixtures/image_with_imagedescription_and_caption-abstract.jpg"),
tempfile:
Path.absname("test/fixtures/image_with_imagedescription_and_caption-abstract.jpg"),
description: "a descriptive white pixel"
}
assert Filter.Exiftool.ReadDescription.filter(@uploads) ==
{:ok, :filtered, uploads_after}
end
test "otherwise returns iptc:Caption-Abstract when present" do
upload = %Pleroma.Upload{
name: "image_with_caption-abstract.jpg",
content_type: "image/jpeg",
path: Path.absname("test/fixtures/image_with_caption-abstract.jpg"),
tempfile: Path.absname("test/fixtures/image_with_caption-abstract.jpg"),
description: nil
}
upload_after = %Pleroma.Upload{
name: "image_with_caption-abstract.jpg",
content_type: "image/jpeg",
path: Path.absname("test/fixtures/image_with_caption-abstract.jpg"),
tempfile: Path.absname("test/fixtures/image_with_caption-abstract.jpg"),
description: "an abstract white pixel"
}
assert Filter.Exiftool.ReadDescription.filter(upload) ==
{:ok, :filtered, upload_after}
end
test "otherwise returns nil" do
uploads = %Pleroma.Upload{
name: "image_with_no_description.jpg",
content_type: "image/jpeg",
path: Path.absname("test/fixtures/image_with_no_description.jpg"),
tempfile: Path.absname("test/fixtures/image_with_no_description.jpg"),
description: nil
}
assert Filter.Exiftool.ReadDescription.filter(uploads) ==
{:ok, :filtered, uploads}
end
test "Return nil when image description from EXIF data exceeds the maximum length" do
clear_config([:instance, :description_limit], 5)
assert Filter.Exiftool.ReadDescription.filter(@uploads) ==
{:ok, :filtered, @uploads}
end
test "Ignores content with only whitespace" do
uploads = %Pleroma.Upload{
name: "non-existant.jpg",
content_type: "image/jpeg",
path:
Path.absname(
"test/fixtures/image_with_imagedescription_and_caption-abstract_whitespaces.jpg"
),
tempfile:
Path.absname(
"test/fixtures/image_with_imagedescription_and_caption-abstract_whitespaces.jpg"
),
description: nil
}
assert Filter.Exiftool.ReadDescription.filter(uploads) ==
{:ok, :filtered, uploads}
end
test "Return nil when image description from EXIF data can't be read" do
uploads = %Pleroma.Upload{
name: "non-existant.jpg",
content_type: "image/jpeg",
path: Path.absname("test/fixtures/non-existant.jpg"),
tempfile: Path.absname("test/fixtures/non-existant_tmp.jpg"),
description: nil
}
assert Filter.Exiftool.ReadDescription.filter(uploads) ==
{:ok, :filtered, uploads}
end
end

View file

@ -2,7 +2,7 @@
# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Upload.Filter.ExiftoolTest do
defmodule Pleroma.Upload.Filter.Exiftool.StripMetadataTest do
use Pleroma.DataCase
alias Pleroma.Upload.Filter
@ -21,7 +21,7 @@ test "apply exiftool filter" do
tempfile: Path.absname("test/fixtures/DSCN0010_tmp.jpg")
}
assert Filter.Exiftool.filter(upload) == {:ok, :filtered}
assert Filter.Exiftool.StripMetadata.filter(upload) == {:ok, :filtered}
{exif_original, 0} = System.cmd("exiftool", ["test/fixtures/DSCN0010.jpg"])
{exif_filtered, 0} = System.cmd("exiftool", ["test/fixtures/DSCN0010_tmp.jpg"])
@ -37,6 +37,6 @@ test "verify webp files are skipped" do
content_type: "image/webp"
}
assert Filter.Exiftool.filter(upload) == {:ok, :noop}
assert Filter.Exiftool.StripMetadata.filter(upload) == {:ok, :noop}
end
end

View file

@ -12,8 +12,7 @@ defmodule Pleroma.Web.ActivityPub.ObjectValidators.AttachmentValidatorTest do
describe "attachments" do
test "works with apng" do
attachment =
%{
attachment = %{
"mediaType" => "image/apng",
"name" => "",
"type" => "Document",

View file

@ -559,8 +559,7 @@ def undo_activity_factory(attrs \\ %{}) do
like_activity = attrs[:like_activity] || insert(:like_activity)
attrs = Map.drop(attrs, [:like_activity])
data =
%{
data = %{
"id" => Pleroma.Web.ActivityPub.Utils.generate_activity_id(),
"type" => "Undo",
"actor" => like_activity.data["actor"],