From 5ee0fb18cb28ef9917a3b1769abb8877ed317e87 Mon Sep 17 00:00:00 2001 From: Oneric Date: Thu, 18 Apr 2024 18:05:29 +0000 Subject: [PATCH] exiftool: make stripped tags configurable --- CHANGELOG.md | 4 +- config/description.exs | 20 +++ docs/docs/configuration/cheatsheet.md | 3 +- .../upload/filter/exiftool/strip_metadata.ex | 36 +++--- .../filter/exiftool/strip_location_test.exs | 116 +++++++++++++++++- 5 files changed, 157 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 723c1060f..578d02964 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ## Changed - Inbound pipeline error handing was modified somewhat, which should lead to less incomprehensible log spam. Hopefully. -- Uploadfilter `Pleroma.Upload.Filter.Exiftool` has been renamed to `Pleroma.Upload.Filter.Exiftool.StripMetadata` +- Uploadfilter `Pleroma.Upload.Filter.Exiftool` was replaced by `Pleroma.Upload.Filter.Exiftool.StripMetadata`; + the latter strips all non-essential metadata by default but can be configured. + To regain the old behaviour of only stripping GPS data set `purge: ["gps:all"]`. ## Fixed - Issue preventing fetching anything from IPv6-only instances diff --git a/config/description.exs b/config/description.exs index ec5050be6..d9c90edd7 100644 --- a/config/description.exs +++ b/config/description.exs @@ -222,6 +222,26 @@ } ] }, + %{ + group: :pleroma, + key: Pleroma.Upload.Filter.Exiftool.StripMetadata, + type: :group, + description: "Strip specified metadata from image uploads", + children: [ + %{ + key: :purge, + description: "Metadata fields or groups to strip", + type: {:list, :string}, + suggestions: ["all", "CommonIFD0"] + }, + %{ + key: :preserve, + description: "Metadata fields or groups to preserve (takes precedence over stripping)", + type: {:list, :string}, + suggestions: ["ColorSpaces", "Orientation"] + } + ] + }, %{ group: :pleroma, key: Pleroma.Emails.Mailer, diff --git a/docs/docs/configuration/cheatsheet.md b/docs/docs/configuration/cheatsheet.md index 4eee3f206..3b5dfa41e 100644 --- a/docs/docs/configuration/cheatsheet.md +++ b/docs/docs/configuration/cheatsheet.md @@ -658,7 +658,8 @@ This filter replaces the declared filename (not the path) of an upload. This filter strips metadata with Exiftool leaving color profiles and orientation intact. -No specific configuration. +* `purge`: List of Exiftool tag names or tag group names to purge +* `preserve`: List of Exiftool tag names or tag group names to preserve even if they occur in the purge list #### Pleroma.Upload.Filter.Exiftool.ReadDescription diff --git a/lib/pleroma/upload/filter/exiftool/strip_metadata.ex b/lib/pleroma/upload/filter/exiftool/strip_metadata.ex index 9173b2a06..912ff6a92 100644 --- a/lib/pleroma/upload/filter/exiftool/strip_metadata.ex +++ b/lib/pleroma/upload/filter/exiftool/strip_metadata.ex @@ -9,6 +9,11 @@ defmodule Pleroma.Upload.Filter.Exiftool.StripMetadata do """ @behaviour Pleroma.Upload.Filter + alias Pleroma.Config + + @purge_default ["all", "CommonIFD0"] + @preserve_default ["ColorSpaceTags", "Orientation"] + @spec filter(Pleroma.Upload.t()) :: {:ok, :noop} | {:ok, :filtered} | {:error, String.t()} # Formats not compatible with exiftool at this time @@ -16,22 +21,23 @@ def filter(%Pleroma.Upload{content_type: "image/webp"}), do: {:ok, :noop} def filter(%Pleroma.Upload{content_type: "image/svg+xml"}), do: {:ok, :noop} def filter(%Pleroma.Upload{tempfile: file, content_type: "image" <> _}) do + purge_args = + Config.get([__MODULE__, :purge], @purge_default) + |> Enum.map(fn mgroup -> "-" <> mgroup <> "=" end) + + preserve_args = + Config.get([__MODULE__, :preserve], @preserve_default) + |> Enum.map(fn mgroup -> "-" <> mgroup end) + |> then(fn + # If -TagsFromFile is not followed by tag selectors, it will copy most available tags + [] -> [] + args -> ["-TagsFromFile", "@" | args] + end) + + args = ["-ignoreMinorErrors", "-overwrite_original" | purge_args] ++ preserve_args ++ [file] + try do - case System.cmd( - "exiftool", - [ - "-ignoreMinorErrors", - "-overwrite_original", - "-all=", - "-CommonIFD0=", - "-TagsFromFile", - "@", - "-ColorSpaceTags", - "-Orientation", - file - ], - parallelism: true - ) do + case System.cmd("exiftool", args, parallelism: true) do {_response, 0} -> {:ok, :filtered} {error, 1} -> {:error, error} end diff --git a/test/pleroma/upload/filter/exiftool/strip_location_test.exs b/test/pleroma/upload/filter/exiftool/strip_location_test.exs index 6f8178115..2e017cd7e 100644 --- a/test/pleroma/upload/filter/exiftool/strip_location_test.exs +++ b/test/pleroma/upload/filter/exiftool/strip_location_test.exs @@ -6,29 +6,104 @@ defmodule Pleroma.Upload.Filter.Exiftool.StripMetadataTest do use Pleroma.DataCase alias Pleroma.Upload.Filter - test "apply exiftool filter" do + @tag :tmp_dir + test "exiftool strip metadata strips GPS etc but preserves Orientation and ColorSpace by default", + %{tmp_dir: tmp_dir} do assert Pleroma.Utils.command_available?("exiftool") + tmpfile = Path.join(tmp_dir, "tmp.jpg") + File.cp!( "test/fixtures/DSCN0010.jpg", - "test/fixtures/DSCN0010_tmp.jpg" + tmpfile ) upload = %Pleroma.Upload{ name: "image_with_GPS_data.jpg", content_type: "image/jpeg", path: Path.absname("test/fixtures/DSCN0010.jpg"), - tempfile: Path.absname("test/fixtures/DSCN0010_tmp.jpg") + tempfile: Path.absname(tmpfile) } assert Filter.Exiftool.StripMetadata.filter(upload) == {:ok, :filtered} - {exif_original, 0} = System.cmd("exiftool", ["test/fixtures/DSCN0010.jpg"]) - {exif_filtered, 0} = System.cmd("exiftool", ["test/fixtures/DSCN0010_tmp.jpg"]) + exif_original = read_exif("test/fixtures/DSCN0010.jpg") + exif_filtered = read_exif(tmpfile) refute exif_original == exif_filtered assert String.match?(exif_original, ~r/GPS/) refute String.match?(exif_filtered, ~r/GPS/) + assert String.match?(exif_original, ~r/Camera Model Name/) + refute String.match?(exif_filtered, ~r/Camera Model Name/) + assert String.match?(exif_original, ~r/Orientation/) + assert String.match?(exif_filtered, ~r/Orientation/) + assert String.match?(exif_original, ~r/Color Space/) + assert String.match?(exif_filtered, ~r/Color Space/) + end + + # this is a nonsensical configuration, but it shouldn't explode + @tag :tmp_dir + test "exiftool strip metadata is a noop with empty purge list", %{tmp_dir: tmp_dir} do + assert Pleroma.Utils.command_available?("exiftool") + clear_config([Pleroma.Upload.Filter.Exiftool.StripMetadata, :purge], []) + + tmpfile = Path.join(tmp_dir, "tmp.jpg") + + File.cp!( + "test/fixtures/DSCN0010.jpg", + tmpfile + ) + + upload = %Pleroma.Upload{ + name: "image_with_GPS_data.jpg", + content_type: "image/jpeg", + path: Path.absname("test/fixtures/DSCN0010.jpg"), + tempfile: Path.absname(tmpfile) + } + + assert Filter.Exiftool.StripMetadata.filter(upload) == {:ok, :filtered} + + exif_original = read_exif("test/fixtures/DSCN0010.jpg") + exif_filtered = read_exif(tmpfile) + + assert exif_original == exif_filtered + end + + @tag :tmp_dir + test "exiftool strip metadata works with empty preserve list", %{tmp_dir: tmp_dir} do + assert Pleroma.Utils.command_available?("exiftool") + clear_config([Pleroma.Upload.Filter.Exiftool.StripMetadata, :preserve], []) + + tmpfile = Path.join(tmp_dir, "tmp.jpg") + + File.cp!( + "test/fixtures/DSCN0010.jpg", + tmpfile + ) + + upload = %Pleroma.Upload{ + name: "image_with_GPS_data.jpg", + content_type: "image/jpeg", + path: Path.absname("test/fixtures/DSCN0010.jpg"), + tempfile: Path.absname(tmpfile) + } + + write_exif(["-ImageDescription=Trees and Houses", "-Orientation=1", tmpfile]) + exif_extended = read_exif(tmpfile) + assert String.match?(exif_extended, ~r/Image Description[ \t]*:[ \t]*Trees and Houses/) + assert String.match?(exif_extended, ~r/Orientation/) + + assert Filter.Exiftool.StripMetadata.filter(upload) == {:ok, :filtered} + + exif_original = read_exif("test/fixtures/DSCN0010.jpg") + exif_filtered = read_exif(tmpfile) + + refute exif_original == exif_filtered + refute exif_extended == exif_filtered + assert String.match?(exif_original, ~r/GPS/) + refute String.match?(exif_filtered, ~r/GPS/) + refute String.match?(exif_filtered, ~r/Image Description/) + refute String.match?(exif_filtered, ~r/Orientation/) end test "verify webp files are skipped" do @@ -39,4 +114,35 @@ test "verify webp files are skipped" do assert Filter.Exiftool.StripMetadata.filter(upload) == {:ok, :noop} end + + test "verify svg files are skipped" do + upload = %Pleroma.Upload{ + name: "sample.svg", + content_type: "image/svg+xml" + } + + assert Filter.Exiftool.StripMetadata.filter(upload) == {:ok, :noop} + end + + defp read_exif(file) do + # time and file path tags cause mismatches even for byte-identical files + {exif_data, 0} = + System.cmd("exiftool", [ + "-x", + "Time:All", + "-x", + "Directory", + "-x", + "FileName", + "-x", + "FileSize", + file + ]) + + exif_data + end + + defp write_exif(args) do + {_response, 0} = System.cmd("exiftool", ["-ignoreMinorErrors", "-overwrite_original" | args]) + end end