From 24e608ab5b71fd92a0a02754c91d2c666c3cfef9 Mon Sep 17 00:00:00 2001 From: Oneric Date: Mon, 15 Apr 2024 23:28:00 +0200 Subject: [PATCH 1/5] docs: fix typo --- docs/docs/installation/optional/media_graphics_packages.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/docs/installation/optional/media_graphics_packages.md b/docs/docs/installation/optional/media_graphics_packages.md index d79ac2a07..922635fd0 100644 --- a/docs/docs/installation/optional/media_graphics_packages.md +++ b/docs/docs/installation/optional/media_graphics_packages.md @@ -14,7 +14,7 @@ Note: the packages are not required with the current default settings of Akkoma. `ImageMagick` is a set of tools to create, edit, compose, or convert bitmap images. It is required for the following Akkoma features: - * `Pleroma.Upload.Filters.Mogrify`, `Pleroma.Upload.Filters.Mogrifun` upload filters (related config: `Plaroma.Upload/filters` in `config/config.exs`) + * `Pleroma.Upload.Filters.Mogrify`, `Pleroma.Upload.Filters.Mogrifun` upload filters (related config: `Pleroma.Upload/filters` in `config/config.exs`) * Media preview proxy for still images (related config: `media_preview_proxy/enabled` in `config/config.exs`) ## `ffmpeg` @@ -29,5 +29,5 @@ It is required for the following Akkoma features: `exiftool` is media files metadata reader/writer. It is required for the following Akkoma features: - * `Pleroma.Upload.Filters.Exiftool.StripMetadata` upload filter (related config: `Plaroma.Upload/filters` in `config/config.exs`) - * `Pleroma.Upload.Filters.Exiftool.ReadDescription` upload filter (related config: `Plaroma.Upload/filters` in `config/config.exs`) + * `Pleroma.Upload.Filters.Exiftool.StripMetadata` upload filter (related config: `Pleroma.Upload/filters` in `config/config.exs`) + * `Pleroma.Upload.Filters.Exiftool.ReadDescription` upload filter (related config: `Pleroma.Upload/filters` in `config/config.exs`) From 163cb1d5e004cfc49e041ba0cd70b83b5a36f25d Mon Sep 17 00:00:00 2001 From: Oneric Date: Mon, 15 Apr 2024 23:32:10 +0200 Subject: [PATCH 2/5] exiftool: strip JXL and HEIC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As of exiftool 12.57 both formats are supported, but EXIF data is optional for JXL and if exiftool doesn’t find a preexisting metadata chunk it will create one and treat it as a minor error resulting in a non-zero exit code. Setting -ignoreMinorErrors avoids failing on such uploads. --- lib/pleroma/upload/filter/exiftool/strip_metadata.ex | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/pleroma/upload/filter/exiftool/strip_metadata.ex b/lib/pleroma/upload/filter/exiftool/strip_metadata.ex index 178b595f3..38a1cf7e2 100644 --- a/lib/pleroma/upload/filter/exiftool/strip_metadata.ex +++ b/lib/pleroma/upload/filter/exiftool/strip_metadata.ex @@ -12,14 +12,16 @@ defmodule Pleroma.Upload.Filter.Exiftool.StripMetadata do @spec filter(Pleroma.Upload.t()) :: {:ok, :noop} | {:ok, :filtered} | {:error, String.t()} # Formats not compatible with exiftool at this time - def filter(%Pleroma.Upload{content_type: "image/heic"}), do: {:ok, :noop} def filter(%Pleroma.Upload{content_type: "image/webp"}), do: {:ok, :noop} def filter(%Pleroma.Upload{content_type: "image/svg+xml"}), do: {:ok, :noop} - def filter(%Pleroma.Upload{content_type: "image/jxl"}), do: {:ok, :noop} def filter(%Pleroma.Upload{tempfile: file, content_type: "image" <> _}) do try do - case System.cmd("exiftool", ["-overwrite_original", "-gps:all=", file], parallelism: true) do + case System.cmd( + "exiftool", + ["-ignoreMinorErrors", "-overwrite_original", "-gps:all=", file], + parallelism: true + ) do {_response, 0} -> {:ok, :filtered} {error, 1} -> {:error, error} end From a95af3ee4c7f14f650ed77fe94580d5ea9df8901 Mon Sep 17 00:00:00 2001 From: Oneric Date: Mon, 15 Apr 2024 23:49:01 +0200 Subject: [PATCH 3/5] exiftool: strip all non-essential tags Documentation was already clear on this only stripping GPS tags. But there are more potentially sensitive metadata tags (e.g. author and possibly description) and the name alone suggests a broader effect. Thus change the filter to strip all metadata except for colourspace info and orientation (technically it strips everything and then readds selected tags). Explicitly stripping CommonIFD0 is needed since -all does not modify IFD0 due to TIFF storing some actual image data there. CommonIFD0 then strips a bunch of commonly used actual metadata tags from IFD0, to my understanding leaving TIFF image data and custom metadata tags intact. --- docs/docs/administration/CLI_tasks/instance.md | 2 +- docs/docs/configuration/cheatsheet.md | 2 +- lib/mix/tasks/pleroma/instance.ex | 4 ++-- .../upload/filter/exiftool/strip_metadata.ex | 14 ++++++++++++-- 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/docs/docs/administration/CLI_tasks/instance.md b/docs/docs/administration/CLI_tasks/instance.md index 1a3f8153e..3d30d1119 100644 --- a/docs/docs/administration/CLI_tasks/instance.md +++ b/docs/docs/administration/CLI_tasks/instance.md @@ -37,7 +37,7 @@ If any of the options are left unspecified, you will be prompted interactively. - `--static-dir ` - the directory custom public files should be read from (custom emojis, frontend bundle overrides, robots.txt, etc.) - `--listen-ip ` - the ip the app should listen to, defaults to 127.0.0.1 - `--listen-port ` - the port the app should listen to, defaults to 4000 -- `--strip-uploads-metadata ` - use ExifTool to strip uploads of sensitive metadata +- `--strip-uploads-metadata ` - use ExifTool to strip uploads of metadata when possible - `--read-uploads-description ` - use ExifTool to read image descriptions from uploads - `--anonymize-uploads ` - randomize uploaded filenames - `--dedupe-uploads ` - store files based on their hash to reduce data storage requirements if duplicates are uploaded with different filenames diff --git a/docs/docs/configuration/cheatsheet.md b/docs/docs/configuration/cheatsheet.md index d2b699a51..4eee3f206 100644 --- a/docs/docs/configuration/cheatsheet.md +++ b/docs/docs/configuration/cheatsheet.md @@ -656,7 +656,7 @@ This filter replaces the declared filename (not the path) of an upload. #### Pleroma.Upload.Filter.Exiftool.StripMetadata -This filter only strips the GPS and location metadata with Exiftool leaving color profiles and attributes intact. +This filter strips metadata with Exiftool leaving color profiles and orientation intact. No specific configuration. diff --git a/lib/mix/tasks/pleroma/instance.ex b/lib/mix/tasks/pleroma/instance.ex index c02efabae..72f4623ce 100644 --- a/lib/mix/tasks/pleroma/instance.ex +++ b/lib/mix/tasks/pleroma/instance.ex @@ -172,10 +172,10 @@ def run(["gen" | rest]) do {strip_uploads_metadata_message, strip_uploads_metadata_default} = if Pleroma.Utils.command_available?("exiftool") do - {"Do you want to strip location (GPS) data from uploaded images? This requires exiftool, it was detected as installed. (y/n)", + {"Do you want to strip metadata from uploaded images? This requires exiftool, it was detected as installed. (y/n)", "y"} else - {"Do you want to strip location (GPS) data from uploaded images? This requires exiftool, it was detected as not installed, please install it if you answer yes. (y/n)", + {"Do you want to strip metadata from uploaded images? This requires exiftool, it was detected as not installed, please install it if you answer yes. (y/n)", "n"} end diff --git a/lib/pleroma/upload/filter/exiftool/strip_metadata.ex b/lib/pleroma/upload/filter/exiftool/strip_metadata.ex index 38a1cf7e2..9173b2a06 100644 --- a/lib/pleroma/upload/filter/exiftool/strip_metadata.ex +++ b/lib/pleroma/upload/filter/exiftool/strip_metadata.ex @@ -4,7 +4,7 @@ defmodule Pleroma.Upload.Filter.Exiftool.StripMetadata do @moduledoc """ - Strips GPS related EXIF tags and overwrites the file in place. + Tries to strip all image metadata but colorspace and orientation overwriting the file in place. Also strips or replaces filesystem metadata e.g., timestamps. """ @behaviour Pleroma.Upload.Filter @@ -19,7 +19,17 @@ def filter(%Pleroma.Upload{tempfile: file, content_type: "image" <> _}) do try do case System.cmd( "exiftool", - ["-ignoreMinorErrors", "-overwrite_original", "-gps:all=", file], + [ + "-ignoreMinorErrors", + "-overwrite_original", + "-all=", + "-CommonIFD0=", + "-TagsFromFile", + "@", + "-ColorSpaceTags", + "-Orientation", + file + ], parallelism: true ) do {_response, 0} -> {:ok, :filtered} From 5ee0fb18cb28ef9917a3b1769abb8877ed317e87 Mon Sep 17 00:00:00 2001 From: Oneric Date: Thu, 18 Apr 2024 18:05:29 +0000 Subject: [PATCH 4/5] exiftool: make stripped tags configurable --- CHANGELOG.md | 4 +- config/description.exs | 20 +++ docs/docs/configuration/cheatsheet.md | 3 +- .../upload/filter/exiftool/strip_metadata.ex | 36 +++--- .../filter/exiftool/strip_location_test.exs | 116 +++++++++++++++++- 5 files changed, 157 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 723c1060f..578d02964 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ## Changed - Inbound pipeline error handing was modified somewhat, which should lead to less incomprehensible log spam. Hopefully. -- Uploadfilter `Pleroma.Upload.Filter.Exiftool` has been renamed to `Pleroma.Upload.Filter.Exiftool.StripMetadata` +- Uploadfilter `Pleroma.Upload.Filter.Exiftool` was replaced by `Pleroma.Upload.Filter.Exiftool.StripMetadata`; + the latter strips all non-essential metadata by default but can be configured. + To regain the old behaviour of only stripping GPS data set `purge: ["gps:all"]`. ## Fixed - Issue preventing fetching anything from IPv6-only instances diff --git a/config/description.exs b/config/description.exs index ec5050be6..d9c90edd7 100644 --- a/config/description.exs +++ b/config/description.exs @@ -222,6 +222,26 @@ } ] }, + %{ + group: :pleroma, + key: Pleroma.Upload.Filter.Exiftool.StripMetadata, + type: :group, + description: "Strip specified metadata from image uploads", + children: [ + %{ + key: :purge, + description: "Metadata fields or groups to strip", + type: {:list, :string}, + suggestions: ["all", "CommonIFD0"] + }, + %{ + key: :preserve, + description: "Metadata fields or groups to preserve (takes precedence over stripping)", + type: {:list, :string}, + suggestions: ["ColorSpaces", "Orientation"] + } + ] + }, %{ group: :pleroma, key: Pleroma.Emails.Mailer, diff --git a/docs/docs/configuration/cheatsheet.md b/docs/docs/configuration/cheatsheet.md index 4eee3f206..3b5dfa41e 100644 --- a/docs/docs/configuration/cheatsheet.md +++ b/docs/docs/configuration/cheatsheet.md @@ -658,7 +658,8 @@ This filter replaces the declared filename (not the path) of an upload. This filter strips metadata with Exiftool leaving color profiles and orientation intact. -No specific configuration. +* `purge`: List of Exiftool tag names or tag group names to purge +* `preserve`: List of Exiftool tag names or tag group names to preserve even if they occur in the purge list #### Pleroma.Upload.Filter.Exiftool.ReadDescription diff --git a/lib/pleroma/upload/filter/exiftool/strip_metadata.ex b/lib/pleroma/upload/filter/exiftool/strip_metadata.ex index 9173b2a06..912ff6a92 100644 --- a/lib/pleroma/upload/filter/exiftool/strip_metadata.ex +++ b/lib/pleroma/upload/filter/exiftool/strip_metadata.ex @@ -9,6 +9,11 @@ defmodule Pleroma.Upload.Filter.Exiftool.StripMetadata do """ @behaviour Pleroma.Upload.Filter + alias Pleroma.Config + + @purge_default ["all", "CommonIFD0"] + @preserve_default ["ColorSpaceTags", "Orientation"] + @spec filter(Pleroma.Upload.t()) :: {:ok, :noop} | {:ok, :filtered} | {:error, String.t()} # Formats not compatible with exiftool at this time @@ -16,22 +21,23 @@ def filter(%Pleroma.Upload{content_type: "image/webp"}), do: {:ok, :noop} def filter(%Pleroma.Upload{content_type: "image/svg+xml"}), do: {:ok, :noop} def filter(%Pleroma.Upload{tempfile: file, content_type: "image" <> _}) do + purge_args = + Config.get([__MODULE__, :purge], @purge_default) + |> Enum.map(fn mgroup -> "-" <> mgroup <> "=" end) + + preserve_args = + Config.get([__MODULE__, :preserve], @preserve_default) + |> Enum.map(fn mgroup -> "-" <> mgroup end) + |> then(fn + # If -TagsFromFile is not followed by tag selectors, it will copy most available tags + [] -> [] + args -> ["-TagsFromFile", "@" | args] + end) + + args = ["-ignoreMinorErrors", "-overwrite_original" | purge_args] ++ preserve_args ++ [file] + try do - case System.cmd( - "exiftool", - [ - "-ignoreMinorErrors", - "-overwrite_original", - "-all=", - "-CommonIFD0=", - "-TagsFromFile", - "@", - "-ColorSpaceTags", - "-Orientation", - file - ], - parallelism: true - ) do + case System.cmd("exiftool", args, parallelism: true) do {_response, 0} -> {:ok, :filtered} {error, 1} -> {:error, error} end diff --git a/test/pleroma/upload/filter/exiftool/strip_location_test.exs b/test/pleroma/upload/filter/exiftool/strip_location_test.exs index 6f8178115..2e017cd7e 100644 --- a/test/pleroma/upload/filter/exiftool/strip_location_test.exs +++ b/test/pleroma/upload/filter/exiftool/strip_location_test.exs @@ -6,29 +6,104 @@ defmodule Pleroma.Upload.Filter.Exiftool.StripMetadataTest do use Pleroma.DataCase alias Pleroma.Upload.Filter - test "apply exiftool filter" do + @tag :tmp_dir + test "exiftool strip metadata strips GPS etc but preserves Orientation and ColorSpace by default", + %{tmp_dir: tmp_dir} do assert Pleroma.Utils.command_available?("exiftool") + tmpfile = Path.join(tmp_dir, "tmp.jpg") + File.cp!( "test/fixtures/DSCN0010.jpg", - "test/fixtures/DSCN0010_tmp.jpg" + tmpfile ) upload = %Pleroma.Upload{ name: "image_with_GPS_data.jpg", content_type: "image/jpeg", path: Path.absname("test/fixtures/DSCN0010.jpg"), - tempfile: Path.absname("test/fixtures/DSCN0010_tmp.jpg") + tempfile: Path.absname(tmpfile) } assert Filter.Exiftool.StripMetadata.filter(upload) == {:ok, :filtered} - {exif_original, 0} = System.cmd("exiftool", ["test/fixtures/DSCN0010.jpg"]) - {exif_filtered, 0} = System.cmd("exiftool", ["test/fixtures/DSCN0010_tmp.jpg"]) + exif_original = read_exif("test/fixtures/DSCN0010.jpg") + exif_filtered = read_exif(tmpfile) refute exif_original == exif_filtered assert String.match?(exif_original, ~r/GPS/) refute String.match?(exif_filtered, ~r/GPS/) + assert String.match?(exif_original, ~r/Camera Model Name/) + refute String.match?(exif_filtered, ~r/Camera Model Name/) + assert String.match?(exif_original, ~r/Orientation/) + assert String.match?(exif_filtered, ~r/Orientation/) + assert String.match?(exif_original, ~r/Color Space/) + assert String.match?(exif_filtered, ~r/Color Space/) + end + + # this is a nonsensical configuration, but it shouldn't explode + @tag :tmp_dir + test "exiftool strip metadata is a noop with empty purge list", %{tmp_dir: tmp_dir} do + assert Pleroma.Utils.command_available?("exiftool") + clear_config([Pleroma.Upload.Filter.Exiftool.StripMetadata, :purge], []) + + tmpfile = Path.join(tmp_dir, "tmp.jpg") + + File.cp!( + "test/fixtures/DSCN0010.jpg", + tmpfile + ) + + upload = %Pleroma.Upload{ + name: "image_with_GPS_data.jpg", + content_type: "image/jpeg", + path: Path.absname("test/fixtures/DSCN0010.jpg"), + tempfile: Path.absname(tmpfile) + } + + assert Filter.Exiftool.StripMetadata.filter(upload) == {:ok, :filtered} + + exif_original = read_exif("test/fixtures/DSCN0010.jpg") + exif_filtered = read_exif(tmpfile) + + assert exif_original == exif_filtered + end + + @tag :tmp_dir + test "exiftool strip metadata works with empty preserve list", %{tmp_dir: tmp_dir} do + assert Pleroma.Utils.command_available?("exiftool") + clear_config([Pleroma.Upload.Filter.Exiftool.StripMetadata, :preserve], []) + + tmpfile = Path.join(tmp_dir, "tmp.jpg") + + File.cp!( + "test/fixtures/DSCN0010.jpg", + tmpfile + ) + + upload = %Pleroma.Upload{ + name: "image_with_GPS_data.jpg", + content_type: "image/jpeg", + path: Path.absname("test/fixtures/DSCN0010.jpg"), + tempfile: Path.absname(tmpfile) + } + + write_exif(["-ImageDescription=Trees and Houses", "-Orientation=1", tmpfile]) + exif_extended = read_exif(tmpfile) + assert String.match?(exif_extended, ~r/Image Description[ \t]*:[ \t]*Trees and Houses/) + assert String.match?(exif_extended, ~r/Orientation/) + + assert Filter.Exiftool.StripMetadata.filter(upload) == {:ok, :filtered} + + exif_original = read_exif("test/fixtures/DSCN0010.jpg") + exif_filtered = read_exif(tmpfile) + + refute exif_original == exif_filtered + refute exif_extended == exif_filtered + assert String.match?(exif_original, ~r/GPS/) + refute String.match?(exif_filtered, ~r/GPS/) + refute String.match?(exif_filtered, ~r/Image Description/) + refute String.match?(exif_filtered, ~r/Orientation/) end test "verify webp files are skipped" do @@ -39,4 +114,35 @@ test "verify webp files are skipped" do assert Filter.Exiftool.StripMetadata.filter(upload) == {:ok, :noop} end + + test "verify svg files are skipped" do + upload = %Pleroma.Upload{ + name: "sample.svg", + content_type: "image/svg+xml" + } + + assert Filter.Exiftool.StripMetadata.filter(upload) == {:ok, :noop} + end + + defp read_exif(file) do + # time and file path tags cause mismatches even for byte-identical files + {exif_data, 0} = + System.cmd("exiftool", [ + "-x", + "Time:All", + "-x", + "Directory", + "-x", + "FileName", + "-x", + "FileSize", + file + ]) + + exif_data + end + + defp write_exif(args) do + {_response, 0} = System.cmd("exiftool", ["-ignoreMinorErrors", "-overwrite_original" | args]) + end end From 5bc64c57534736eaa3da910916d2e5fdf6c385d9 Mon Sep 17 00:00:00 2001 From: Oneric Date: Fri, 26 Apr 2024 00:01:13 +0200 Subject: [PATCH 5/5] changelog: add note about StripMetadata and ReadDescription order --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 578d02964..49ee8f6f3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - Support for [FEP-fffd](https://codeberg.org/fediverse/fep/src/branch/main/fep/fffd/fep-fffd.md) (proxy objects) - Verified support for elixir 1.16 - Uploadfilter `Pleroma.Upload.Filter.Exiftool.ReadDescription` returns description values to the FE so they can pre fill the image description field + NOTE: this filter MUST be placed before `Exiftool.StripMetadata` to work ## Changed - Inbound pipeline error handing was modified somewhat, which should lead to less incomprehensible log spam. Hopefully.