From b0a46c1e2eaaf2b29b5764eafe8494fe2a26e486 Mon Sep 17 00:00:00 2001 From: Oneric Date: Tue, 23 Jan 2024 20:10:01 +0100 Subject: [PATCH 1/8] Normalise public adressing to fix federation Due to JSON-LD compaction the full address of public scope may also occur in shorter forms and the spec requires us to treat them all equivalently. To save us the pain of repeatedly checking for all variants internally, normalise inbound data to just one form. See note at: https://www.w3.org/TR/activitypub/#public-addressing This needs to happen very early, even before the other addressing fixes else an earlier validator will reject the object. This in turn required to move the list-tpye normalisation earlier as well, but since I was unsure about putting empty lists into the data when no such field existed before, I excluded this case and thus the later fixing had to be kept as well. Fixes: https://akkoma.dev/AkkomaGang/akkoma/issues/670 --- CHANGELOG.md | 2 + .../web/activity_pub/transmogrifier.ex | 209 +++++++++++------- test/pleroma/web/federator_test.exs | 31 +++ 3 files changed, 162 insertions(+), 80 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 723c1060f..6a42edffc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ## Fixed - Issue preventing fetching anything from IPv6-only instances - Issue allowing post content to leak via opengraph tags despite :estrict\_unauthenticated being set +- Scope misdetection of remote posts resulting from not recognising + JSON-LD-compacted forms of public scope; affected e.g. federation with bovine ## 2024.03 diff --git a/lib/pleroma/web/activity_pub/transmogrifier.ex b/lib/pleroma/web/activity_pub/transmogrifier.ex index 033fc9e78..065df5150 100644 --- a/lib/pleroma/web/activity_pub/transmogrifier.ex +++ b/lib/pleroma/web/activity_pub/transmogrifier.ex @@ -58,21 +58,48 @@ def fix_summary(%{"summary" => _} = object) do def fix_summary(object), do: Map.put(object, "summary", "") - def fix_addressing_list(map, field) do - addrs = map[field] - + defp fix_addressing_list(addrs) do cond do - is_list(addrs) -> - Map.put(map, field, Enum.filter(addrs, &is_binary/1)) - - is_binary(addrs) -> - Map.put(map, field, [addrs]) - - true -> - Map.put(map, field, []) + is_list(addrs) -> Enum.filter(addrs, &is_binary/1) + is_binary(addrs) -> [addrs] + true -> [] end end + # Due to JSON-LD simply "Public" and "as:Public" are equivalent to the full URI + # but to simplify later checks we only want to deal with one reperesentation internally + defp normalise_addressing_public_list(map, all_fields) + + defp normalise_addressing_public_list(%{} = map, [field | fields]) do + full_uri = Pleroma.Constants.as_public() + + map = + if map[field] != nil do + new_fval = + map[field] + |> fix_addressing_list() + |> Enum.map(fn + "Public" -> full_uri + "as:Public" -> full_uri + x -> x + end) + + Map.put(map, field, new_fval) + else + map + end + + normalise_addressing_public_list(map, fields) + end + + defp normalise_addressing_public_list(map, _) do + map + end + + defp normalise_addressing_public(map) do + normalise_addressing_public_list(map, ["to", "cc", "bto", "bcc"]) + end + # if directMessage flag is set to true, leave the addressing alone def fix_explicit_addressing(%{"directMessage" => true} = object, _follower_collection), do: object @@ -96,6 +123,10 @@ def fix_explicit_addressing(%{"to" => to, "cc" => cc} = object, follower_collect |> Map.put("cc", final_cc) end + def fix_addressing_list_key(map, field) do + Map.put(map, field, fix_addressing_list(map[field])) + end + def fix_addressing(object) do {:ok, %User{follower_address: follower_collection}} = object @@ -103,10 +134,10 @@ def fix_addressing(object) do |> User.get_or_fetch_by_ap_id() object - |> fix_addressing_list("to") - |> fix_addressing_list("cc") - |> fix_addressing_list("bto") - |> fix_addressing_list("bcc") + |> fix_addressing_list_key("to") + |> fix_addressing_list_key("cc") + |> fix_addressing_list_key("bto") + |> fix_addressing_list_key("bcc") |> fix_explicit_addressing(follower_collection) |> CommonFixes.fix_implicit_addressing(follower_collection) end @@ -383,11 +414,28 @@ defp get_reported(objects) do end) end - def handle_incoming(data, options \\ []) + def handle_incoming(data, options \\ []) do + data = normalise_addressing_public(data) + + data = + if data["object"] != nil do + object = normalise_addressing_public(data["object"]) + Map.put(data, "object", object) + else + data + end + + handle_incoming_normalised(data, options) + end + + defp handle_incoming_normalised(data, options) # Flag objects are placed ahead of the ID check because Mastodon 2.8 and earlier send them # with nil ID. - def handle_incoming(%{"type" => "Flag", "object" => objects, "actor" => actor} = data, _options) do + defp handle_incoming_normalised( + %{"type" => "Flag", "object" => objects, "actor" => actor} = data, + _options + ) do with context <- data["context"] || Utils.generate_context_id(), content <- data["content"] || "", %User{} = actor <- User.get_cached_by_ap_id(actor), @@ -408,20 +456,21 @@ def handle_incoming(%{"type" => "Flag", "object" => objects, "actor" => actor} = end # disallow objects with bogus IDs - def handle_incoming(%{"id" => nil}, _options), do: :error - def handle_incoming(%{"id" => ""}, _options), do: :error + defp handle_incoming_normalised(%{"id" => nil}, _options), do: :error + defp handle_incoming_normalised(%{"id" => ""}, _options), do: :error # length of https:// = 8, should validate better, but good enough for now. - def handle_incoming(%{"id" => id}, _options) when is_binary(id) and byte_size(id) < 8, - do: :error + defp handle_incoming_normalised(%{"id" => id}, _options) + when is_binary(id) and byte_size(id) < 8, + do: :error - @doc "Rewrite misskey likes into EmojiReacts" - def handle_incoming( - %{ - "type" => "Like", - "content" => reaction - } = data, - options - ) do + # Rewrite misskey likes into EmojiReacts + defp handle_incoming_normalised( + %{ + "type" => "Like", + "content" => reaction + } = data, + options + ) do if Pleroma.Emoji.is_unicode_emoji?(reaction) || Pleroma.Emoji.matches_shortcode?(reaction) do data |> Map.put("type", "EmojiReact") @@ -433,11 +482,11 @@ def handle_incoming( end end - def handle_incoming( - %{"type" => "Create", "object" => %{"type" => objtype, "id" => obj_id}} = data, - options - ) - when objtype in ~w{Question Answer Audio Video Event Article Note Page} do + defp handle_incoming_normalised( + %{"type" => "Create", "object" => %{"type" => objtype, "id" => obj_id}} = data, + options + ) + when objtype in ~w{Question Answer Audio Video Event Article Note Page} do fetch_options = Keyword.put(options, :depth, (options[:depth] || 0) + 1) object = @@ -469,8 +518,8 @@ def handle_incoming( end end - def handle_incoming(%{"type" => type} = data, _options) - when type in ~w{Like EmojiReact Announce Add Remove} do + defp handle_incoming_normalised(%{"type" => type} = data, _options) + when type in ~w{Like EmojiReact Announce Add Remove} do with :ok <- ObjectValidator.fetch_actor_and_object(data), {:ok, activity, _meta} <- Pipeline.common_pipeline(data, local: false) do {:ok, activity} @@ -480,11 +529,11 @@ def handle_incoming(%{"type" => type} = data, _options) end end - def handle_incoming( - %{"type" => type} = data, - _options - ) - when type in ~w{Update Block Follow Accept Reject} do + defp handle_incoming_normalised( + %{"type" => type} = data, + _options + ) + when type in ~w{Update Block Follow Accept Reject} do with {:ok, %User{}} <- ObjectValidator.fetch_actor(data), {:ok, activity, _} <- Pipeline.common_pipeline(data, local: false) do @@ -492,10 +541,10 @@ def handle_incoming( end end - def handle_incoming( - %{"type" => "Delete"} = data, - _options - ) do + defp handle_incoming_normalised( + %{"type" => "Delete"} = data, + _options + ) do with {:ok, activity, _} <- Pipeline.common_pipeline(data, local: false) do {:ok, activity} @@ -515,15 +564,15 @@ def handle_incoming( end end - def handle_incoming( - %{ - "type" => "Undo", - "object" => %{"type" => "Follow", "object" => followed}, - "actor" => follower, - "id" => id - } = _data, - _options - ) do + defp handle_incoming_normalised( + %{ + "type" => "Undo", + "object" => %{"type" => "Follow", "object" => followed}, + "actor" => follower, + "id" => id + } = _data, + _options + ) do with %User{local: true} = followed <- User.get_cached_by_ap_id(followed), {:ok, %User{} = follower} <- User.get_or_fetch_by_ap_id(follower), {:ok, activity} <- ActivityPub.unfollow(follower, followed, id, false) do @@ -534,28 +583,28 @@ def handle_incoming( end end - def handle_incoming( - %{ - "type" => "Undo", - "object" => %{"type" => type} - } = data, - _options - ) - when type in ["Like", "EmojiReact", "Announce", "Block"] do + defp handle_incoming_normalised( + %{ + "type" => "Undo", + "object" => %{"type" => type} + } = data, + _options + ) + when type in ["Like", "EmojiReact", "Announce", "Block"] do with {:ok, activity, _} <- Pipeline.common_pipeline(data, local: false) do {:ok, activity} end end # For Undos that don't have the complete object attached, try to find it in our database. - def handle_incoming( - %{ - "type" => "Undo", - "object" => object - } = activity, - options - ) - when is_binary(object) do + defp handle_incoming_normalised( + %{ + "type" => "Undo", + "object" => object + } = activity, + options + ) + when is_binary(object) do with %Activity{data: data} <- Activity.get_by_ap_id(object) do activity |> Map.put("object", data) @@ -565,15 +614,15 @@ def handle_incoming( end end - def handle_incoming( - %{ - "type" => "Move", - "actor" => origin_actor, - "object" => origin_actor, - "target" => target_actor - }, - _options - ) do + defp handle_incoming_normalised( + %{ + "type" => "Move", + "actor" => origin_actor, + "object" => origin_actor, + "target" => target_actor + }, + _options + ) do with %User{} = origin_user <- User.get_cached_by_ap_id(origin_actor), # Use a dramatically shortened maximum age before refresh here because it is reasonable # for a user to @@ -588,7 +637,7 @@ def handle_incoming( end end - def handle_incoming(_, _), do: :error + defp handle_incoming_normalised(_, _), do: :error @spec get_obj_helper(String.t(), Keyword.t()) :: {:ok, Object.t()} | nil def get_obj_helper(id, options \\ []) do diff --git a/test/pleroma/web/federator_test.exs b/test/pleroma/web/federator_test.exs index 76a7a6d37..d3cc239cf 100644 --- a/test/pleroma/web/federator_test.exs +++ b/test/pleroma/web/federator_test.exs @@ -137,6 +137,37 @@ test "successfully processes incoming AP docs with correct origin" do assert {:error, :already_present} = ObanHelpers.perform(job) end + test "successfully normalises public scope descriptors" do + params = %{ + "@context" => "https://www.w3.org/ns/activitystreams", + "actor" => "http://mastodon.example.org/users/admin", + "type" => "Create", + "id" => "http://mastodon.example.org/users/admin/activities/1", + "object" => %{ + "type" => "Note", + "content" => "hi world!", + "id" => "http://mastodon.example.org/users/admin/objects/1", + "attributedTo" => "http://mastodon.example.org/users/admin", + "to" => ["Public"] + }, + "to" => ["as:Public"] + } + + assert {:ok, job} = Federator.incoming_ap_doc(params) + assert {:ok, activity} = ObanHelpers.perform(job) + assert activity.data["to"] == ["https://www.w3.org/ns/activitystreams#Public"] + + object = + from( + object in Pleroma.Object, + where: fragment("(?)->>'id' = ?", object.data, ^activity.data["object"]), + limit: 1 + ) + |> Repo.one() + + assert object.data["to"] == ["https://www.w3.org/ns/activitystreams#Public"] + end + test "rejects incoming AP docs with incorrect origin" do params = %{ "@context" => "https://www.w3.org/ns/activitystreams", From 24e608ab5b71fd92a0a02754c91d2c666c3cfef9 Mon Sep 17 00:00:00 2001 From: Oneric Date: Mon, 15 Apr 2024 23:28:00 +0200 Subject: [PATCH 2/8] docs: fix typo --- docs/docs/installation/optional/media_graphics_packages.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/docs/installation/optional/media_graphics_packages.md b/docs/docs/installation/optional/media_graphics_packages.md index d79ac2a07..922635fd0 100644 --- a/docs/docs/installation/optional/media_graphics_packages.md +++ b/docs/docs/installation/optional/media_graphics_packages.md @@ -14,7 +14,7 @@ Note: the packages are not required with the current default settings of Akkoma. `ImageMagick` is a set of tools to create, edit, compose, or convert bitmap images. It is required for the following Akkoma features: - * `Pleroma.Upload.Filters.Mogrify`, `Pleroma.Upload.Filters.Mogrifun` upload filters (related config: `Plaroma.Upload/filters` in `config/config.exs`) + * `Pleroma.Upload.Filters.Mogrify`, `Pleroma.Upload.Filters.Mogrifun` upload filters (related config: `Pleroma.Upload/filters` in `config/config.exs`) * Media preview proxy for still images (related config: `media_preview_proxy/enabled` in `config/config.exs`) ## `ffmpeg` @@ -29,5 +29,5 @@ It is required for the following Akkoma features: `exiftool` is media files metadata reader/writer. It is required for the following Akkoma features: - * `Pleroma.Upload.Filters.Exiftool.StripMetadata` upload filter (related config: `Plaroma.Upload/filters` in `config/config.exs`) - * `Pleroma.Upload.Filters.Exiftool.ReadDescription` upload filter (related config: `Plaroma.Upload/filters` in `config/config.exs`) + * `Pleroma.Upload.Filters.Exiftool.StripMetadata` upload filter (related config: `Pleroma.Upload/filters` in `config/config.exs`) + * `Pleroma.Upload.Filters.Exiftool.ReadDescription` upload filter (related config: `Pleroma.Upload/filters` in `config/config.exs`) From 163cb1d5e004cfc49e041ba0cd70b83b5a36f25d Mon Sep 17 00:00:00 2001 From: Oneric Date: Mon, 15 Apr 2024 23:32:10 +0200 Subject: [PATCH 3/8] exiftool: strip JXL and HEIC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As of exiftool 12.57 both formats are supported, but EXIF data is optional for JXL and if exiftool doesn’t find a preexisting metadata chunk it will create one and treat it as a minor error resulting in a non-zero exit code. Setting -ignoreMinorErrors avoids failing on such uploads. --- lib/pleroma/upload/filter/exiftool/strip_metadata.ex | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/pleroma/upload/filter/exiftool/strip_metadata.ex b/lib/pleroma/upload/filter/exiftool/strip_metadata.ex index 178b595f3..38a1cf7e2 100644 --- a/lib/pleroma/upload/filter/exiftool/strip_metadata.ex +++ b/lib/pleroma/upload/filter/exiftool/strip_metadata.ex @@ -12,14 +12,16 @@ defmodule Pleroma.Upload.Filter.Exiftool.StripMetadata do @spec filter(Pleroma.Upload.t()) :: {:ok, :noop} | {:ok, :filtered} | {:error, String.t()} # Formats not compatible with exiftool at this time - def filter(%Pleroma.Upload{content_type: "image/heic"}), do: {:ok, :noop} def filter(%Pleroma.Upload{content_type: "image/webp"}), do: {:ok, :noop} def filter(%Pleroma.Upload{content_type: "image/svg+xml"}), do: {:ok, :noop} - def filter(%Pleroma.Upload{content_type: "image/jxl"}), do: {:ok, :noop} def filter(%Pleroma.Upload{tempfile: file, content_type: "image" <> _}) do try do - case System.cmd("exiftool", ["-overwrite_original", "-gps:all=", file], parallelism: true) do + case System.cmd( + "exiftool", + ["-ignoreMinorErrors", "-overwrite_original", "-gps:all=", file], + parallelism: true + ) do {_response, 0} -> {:ok, :filtered} {error, 1} -> {:error, error} end From a95af3ee4c7f14f650ed77fe94580d5ea9df8901 Mon Sep 17 00:00:00 2001 From: Oneric Date: Mon, 15 Apr 2024 23:49:01 +0200 Subject: [PATCH 4/8] exiftool: strip all non-essential tags Documentation was already clear on this only stripping GPS tags. But there are more potentially sensitive metadata tags (e.g. author and possibly description) and the name alone suggests a broader effect. Thus change the filter to strip all metadata except for colourspace info and orientation (technically it strips everything and then readds selected tags). Explicitly stripping CommonIFD0 is needed since -all does not modify IFD0 due to TIFF storing some actual image data there. CommonIFD0 then strips a bunch of commonly used actual metadata tags from IFD0, to my understanding leaving TIFF image data and custom metadata tags intact. --- docs/docs/administration/CLI_tasks/instance.md | 2 +- docs/docs/configuration/cheatsheet.md | 2 +- lib/mix/tasks/pleroma/instance.ex | 4 ++-- .../upload/filter/exiftool/strip_metadata.ex | 14 ++++++++++++-- 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/docs/docs/administration/CLI_tasks/instance.md b/docs/docs/administration/CLI_tasks/instance.md index 1a3f8153e..3d30d1119 100644 --- a/docs/docs/administration/CLI_tasks/instance.md +++ b/docs/docs/administration/CLI_tasks/instance.md @@ -37,7 +37,7 @@ If any of the options are left unspecified, you will be prompted interactively. - `--static-dir ` - the directory custom public files should be read from (custom emojis, frontend bundle overrides, robots.txt, etc.) - `--listen-ip ` - the ip the app should listen to, defaults to 127.0.0.1 - `--listen-port ` - the port the app should listen to, defaults to 4000 -- `--strip-uploads-metadata ` - use ExifTool to strip uploads of sensitive metadata +- `--strip-uploads-metadata ` - use ExifTool to strip uploads of metadata when possible - `--read-uploads-description ` - use ExifTool to read image descriptions from uploads - `--anonymize-uploads ` - randomize uploaded filenames - `--dedupe-uploads ` - store files based on their hash to reduce data storage requirements if duplicates are uploaded with different filenames diff --git a/docs/docs/configuration/cheatsheet.md b/docs/docs/configuration/cheatsheet.md index d2b699a51..4eee3f206 100644 --- a/docs/docs/configuration/cheatsheet.md +++ b/docs/docs/configuration/cheatsheet.md @@ -656,7 +656,7 @@ This filter replaces the declared filename (not the path) of an upload. #### Pleroma.Upload.Filter.Exiftool.StripMetadata -This filter only strips the GPS and location metadata with Exiftool leaving color profiles and attributes intact. +This filter strips metadata with Exiftool leaving color profiles and orientation intact. No specific configuration. diff --git a/lib/mix/tasks/pleroma/instance.ex b/lib/mix/tasks/pleroma/instance.ex index c02efabae..72f4623ce 100644 --- a/lib/mix/tasks/pleroma/instance.ex +++ b/lib/mix/tasks/pleroma/instance.ex @@ -172,10 +172,10 @@ def run(["gen" | rest]) do {strip_uploads_metadata_message, strip_uploads_metadata_default} = if Pleroma.Utils.command_available?("exiftool") do - {"Do you want to strip location (GPS) data from uploaded images? This requires exiftool, it was detected as installed. (y/n)", + {"Do you want to strip metadata from uploaded images? This requires exiftool, it was detected as installed. (y/n)", "y"} else - {"Do you want to strip location (GPS) data from uploaded images? This requires exiftool, it was detected as not installed, please install it if you answer yes. (y/n)", + {"Do you want to strip metadata from uploaded images? This requires exiftool, it was detected as not installed, please install it if you answer yes. (y/n)", "n"} end diff --git a/lib/pleroma/upload/filter/exiftool/strip_metadata.ex b/lib/pleroma/upload/filter/exiftool/strip_metadata.ex index 38a1cf7e2..9173b2a06 100644 --- a/lib/pleroma/upload/filter/exiftool/strip_metadata.ex +++ b/lib/pleroma/upload/filter/exiftool/strip_metadata.ex @@ -4,7 +4,7 @@ defmodule Pleroma.Upload.Filter.Exiftool.StripMetadata do @moduledoc """ - Strips GPS related EXIF tags and overwrites the file in place. + Tries to strip all image metadata but colorspace and orientation overwriting the file in place. Also strips or replaces filesystem metadata e.g., timestamps. """ @behaviour Pleroma.Upload.Filter @@ -19,7 +19,17 @@ def filter(%Pleroma.Upload{tempfile: file, content_type: "image" <> _}) do try do case System.cmd( "exiftool", - ["-ignoreMinorErrors", "-overwrite_original", "-gps:all=", file], + [ + "-ignoreMinorErrors", + "-overwrite_original", + "-all=", + "-CommonIFD0=", + "-TagsFromFile", + "@", + "-ColorSpaceTags", + "-Orientation", + file + ], parallelism: true ) do {_response, 0} -> {:ok, :filtered} From 72c2d9f009db61da4f73607ab57b84f83b797c0c Mon Sep 17 00:00:00 2001 From: Norm Date: Fri, 26 Apr 2024 01:43:44 -0400 Subject: [PATCH 5/8] Change nginx cache size to 1 GiB The current 10 GiB cache size is too large to fit into tmpfs for VMs and other machines with smaller RAM sizes. Most non-Debian distros mount /tmp on tmpfs. --- installation/nginx/akkoma.nginx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/installation/nginx/akkoma.nginx b/installation/nginx/akkoma.nginx index 5b7162d1e..bfb1fffb3 100644 --- a/installation/nginx/akkoma.nginx +++ b/installation/nginx/akkoma.nginx @@ -3,7 +3,7 @@ # See the documentation at docs.akkoma.dev for your particular distro/OS for # installation instructions. -proxy_cache_path /tmp/akkoma-media-cache levels=1:2 keys_zone=akkoma_media_cache:10m max_size=10g +proxy_cache_path /tmp/akkoma-media-cache levels=1:2 keys_zone=akkoma_media_cache:10m max_size=1g inactive=720m use_temp_path=off; # this is explicitly IPv4 since Pleroma.Web.Endpoint binds on IPv4 only From 5b320616ca6cf7288784066fe228fd7d25b9c09c Mon Sep 17 00:00:00 2001 From: Norm Date: Fri, 26 Apr 2024 02:33:18 -0400 Subject: [PATCH 6/8] Remove unused top level files I don't think anyone really uses the tools that uses these files these days, and they are another thing that needs to be updated every so often. --- Procfile | 2 -- coveralls.json | 7 ------- elixir_buildpack.config | 2 -- 3 files changed, 11 deletions(-) delete mode 100644 Procfile delete mode 100644 coveralls.json delete mode 100644 elixir_buildpack.config diff --git a/Procfile b/Procfile deleted file mode 100644 index 7ac187baa..000000000 --- a/Procfile +++ /dev/null @@ -1,2 +0,0 @@ -web: mix phx.server -release: mix ecto.migrate diff --git a/coveralls.json b/coveralls.json deleted file mode 100644 index 8652591ef..000000000 --- a/coveralls.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "skip_files": [ - "test/support", - "lib/mix/tasks/pleroma/benchmark.ex", - "lib/credo/check/consistency/file_location.ex" - ] -} \ No newline at end of file diff --git a/elixir_buildpack.config b/elixir_buildpack.config deleted file mode 100644 index ee9e051a6..000000000 --- a/elixir_buildpack.config +++ /dev/null @@ -1,2 +0,0 @@ -elixir_version=1.14.3 -erlang_version=25.3 From 5ee0fb18cb28ef9917a3b1769abb8877ed317e87 Mon Sep 17 00:00:00 2001 From: Oneric Date: Thu, 18 Apr 2024 18:05:29 +0000 Subject: [PATCH 7/8] exiftool: make stripped tags configurable --- CHANGELOG.md | 4 +- config/description.exs | 20 +++ docs/docs/configuration/cheatsheet.md | 3 +- .../upload/filter/exiftool/strip_metadata.ex | 36 +++--- .../filter/exiftool/strip_location_test.exs | 116 +++++++++++++++++- 5 files changed, 157 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 723c1060f..578d02964 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ## Changed - Inbound pipeline error handing was modified somewhat, which should lead to less incomprehensible log spam. Hopefully. -- Uploadfilter `Pleroma.Upload.Filter.Exiftool` has been renamed to `Pleroma.Upload.Filter.Exiftool.StripMetadata` +- Uploadfilter `Pleroma.Upload.Filter.Exiftool` was replaced by `Pleroma.Upload.Filter.Exiftool.StripMetadata`; + the latter strips all non-essential metadata by default but can be configured. + To regain the old behaviour of only stripping GPS data set `purge: ["gps:all"]`. ## Fixed - Issue preventing fetching anything from IPv6-only instances diff --git a/config/description.exs b/config/description.exs index ec5050be6..d9c90edd7 100644 --- a/config/description.exs +++ b/config/description.exs @@ -222,6 +222,26 @@ } ] }, + %{ + group: :pleroma, + key: Pleroma.Upload.Filter.Exiftool.StripMetadata, + type: :group, + description: "Strip specified metadata from image uploads", + children: [ + %{ + key: :purge, + description: "Metadata fields or groups to strip", + type: {:list, :string}, + suggestions: ["all", "CommonIFD0"] + }, + %{ + key: :preserve, + description: "Metadata fields or groups to preserve (takes precedence over stripping)", + type: {:list, :string}, + suggestions: ["ColorSpaces", "Orientation"] + } + ] + }, %{ group: :pleroma, key: Pleroma.Emails.Mailer, diff --git a/docs/docs/configuration/cheatsheet.md b/docs/docs/configuration/cheatsheet.md index 4eee3f206..3b5dfa41e 100644 --- a/docs/docs/configuration/cheatsheet.md +++ b/docs/docs/configuration/cheatsheet.md @@ -658,7 +658,8 @@ This filter replaces the declared filename (not the path) of an upload. This filter strips metadata with Exiftool leaving color profiles and orientation intact. -No specific configuration. +* `purge`: List of Exiftool tag names or tag group names to purge +* `preserve`: List of Exiftool tag names or tag group names to preserve even if they occur in the purge list #### Pleroma.Upload.Filter.Exiftool.ReadDescription diff --git a/lib/pleroma/upload/filter/exiftool/strip_metadata.ex b/lib/pleroma/upload/filter/exiftool/strip_metadata.ex index 9173b2a06..912ff6a92 100644 --- a/lib/pleroma/upload/filter/exiftool/strip_metadata.ex +++ b/lib/pleroma/upload/filter/exiftool/strip_metadata.ex @@ -9,6 +9,11 @@ defmodule Pleroma.Upload.Filter.Exiftool.StripMetadata do """ @behaviour Pleroma.Upload.Filter + alias Pleroma.Config + + @purge_default ["all", "CommonIFD0"] + @preserve_default ["ColorSpaceTags", "Orientation"] + @spec filter(Pleroma.Upload.t()) :: {:ok, :noop} | {:ok, :filtered} | {:error, String.t()} # Formats not compatible with exiftool at this time @@ -16,22 +21,23 @@ def filter(%Pleroma.Upload{content_type: "image/webp"}), do: {:ok, :noop} def filter(%Pleroma.Upload{content_type: "image/svg+xml"}), do: {:ok, :noop} def filter(%Pleroma.Upload{tempfile: file, content_type: "image" <> _}) do + purge_args = + Config.get([__MODULE__, :purge], @purge_default) + |> Enum.map(fn mgroup -> "-" <> mgroup <> "=" end) + + preserve_args = + Config.get([__MODULE__, :preserve], @preserve_default) + |> Enum.map(fn mgroup -> "-" <> mgroup end) + |> then(fn + # If -TagsFromFile is not followed by tag selectors, it will copy most available tags + [] -> [] + args -> ["-TagsFromFile", "@" | args] + end) + + args = ["-ignoreMinorErrors", "-overwrite_original" | purge_args] ++ preserve_args ++ [file] + try do - case System.cmd( - "exiftool", - [ - "-ignoreMinorErrors", - "-overwrite_original", - "-all=", - "-CommonIFD0=", - "-TagsFromFile", - "@", - "-ColorSpaceTags", - "-Orientation", - file - ], - parallelism: true - ) do + case System.cmd("exiftool", args, parallelism: true) do {_response, 0} -> {:ok, :filtered} {error, 1} -> {:error, error} end diff --git a/test/pleroma/upload/filter/exiftool/strip_location_test.exs b/test/pleroma/upload/filter/exiftool/strip_location_test.exs index 6f8178115..2e017cd7e 100644 --- a/test/pleroma/upload/filter/exiftool/strip_location_test.exs +++ b/test/pleroma/upload/filter/exiftool/strip_location_test.exs @@ -6,29 +6,104 @@ defmodule Pleroma.Upload.Filter.Exiftool.StripMetadataTest do use Pleroma.DataCase alias Pleroma.Upload.Filter - test "apply exiftool filter" do + @tag :tmp_dir + test "exiftool strip metadata strips GPS etc but preserves Orientation and ColorSpace by default", + %{tmp_dir: tmp_dir} do assert Pleroma.Utils.command_available?("exiftool") + tmpfile = Path.join(tmp_dir, "tmp.jpg") + File.cp!( "test/fixtures/DSCN0010.jpg", - "test/fixtures/DSCN0010_tmp.jpg" + tmpfile ) upload = %Pleroma.Upload{ name: "image_with_GPS_data.jpg", content_type: "image/jpeg", path: Path.absname("test/fixtures/DSCN0010.jpg"), - tempfile: Path.absname("test/fixtures/DSCN0010_tmp.jpg") + tempfile: Path.absname(tmpfile) } assert Filter.Exiftool.StripMetadata.filter(upload) == {:ok, :filtered} - {exif_original, 0} = System.cmd("exiftool", ["test/fixtures/DSCN0010.jpg"]) - {exif_filtered, 0} = System.cmd("exiftool", ["test/fixtures/DSCN0010_tmp.jpg"]) + exif_original = read_exif("test/fixtures/DSCN0010.jpg") + exif_filtered = read_exif(tmpfile) refute exif_original == exif_filtered assert String.match?(exif_original, ~r/GPS/) refute String.match?(exif_filtered, ~r/GPS/) + assert String.match?(exif_original, ~r/Camera Model Name/) + refute String.match?(exif_filtered, ~r/Camera Model Name/) + assert String.match?(exif_original, ~r/Orientation/) + assert String.match?(exif_filtered, ~r/Orientation/) + assert String.match?(exif_original, ~r/Color Space/) + assert String.match?(exif_filtered, ~r/Color Space/) + end + + # this is a nonsensical configuration, but it shouldn't explode + @tag :tmp_dir + test "exiftool strip metadata is a noop with empty purge list", %{tmp_dir: tmp_dir} do + assert Pleroma.Utils.command_available?("exiftool") + clear_config([Pleroma.Upload.Filter.Exiftool.StripMetadata, :purge], []) + + tmpfile = Path.join(tmp_dir, "tmp.jpg") + + File.cp!( + "test/fixtures/DSCN0010.jpg", + tmpfile + ) + + upload = %Pleroma.Upload{ + name: "image_with_GPS_data.jpg", + content_type: "image/jpeg", + path: Path.absname("test/fixtures/DSCN0010.jpg"), + tempfile: Path.absname(tmpfile) + } + + assert Filter.Exiftool.StripMetadata.filter(upload) == {:ok, :filtered} + + exif_original = read_exif("test/fixtures/DSCN0010.jpg") + exif_filtered = read_exif(tmpfile) + + assert exif_original == exif_filtered + end + + @tag :tmp_dir + test "exiftool strip metadata works with empty preserve list", %{tmp_dir: tmp_dir} do + assert Pleroma.Utils.command_available?("exiftool") + clear_config([Pleroma.Upload.Filter.Exiftool.StripMetadata, :preserve], []) + + tmpfile = Path.join(tmp_dir, "tmp.jpg") + + File.cp!( + "test/fixtures/DSCN0010.jpg", + tmpfile + ) + + upload = %Pleroma.Upload{ + name: "image_with_GPS_data.jpg", + content_type: "image/jpeg", + path: Path.absname("test/fixtures/DSCN0010.jpg"), + tempfile: Path.absname(tmpfile) + } + + write_exif(["-ImageDescription=Trees and Houses", "-Orientation=1", tmpfile]) + exif_extended = read_exif(tmpfile) + assert String.match?(exif_extended, ~r/Image Description[ \t]*:[ \t]*Trees and Houses/) + assert String.match?(exif_extended, ~r/Orientation/) + + assert Filter.Exiftool.StripMetadata.filter(upload) == {:ok, :filtered} + + exif_original = read_exif("test/fixtures/DSCN0010.jpg") + exif_filtered = read_exif(tmpfile) + + refute exif_original == exif_filtered + refute exif_extended == exif_filtered + assert String.match?(exif_original, ~r/GPS/) + refute String.match?(exif_filtered, ~r/GPS/) + refute String.match?(exif_filtered, ~r/Image Description/) + refute String.match?(exif_filtered, ~r/Orientation/) end test "verify webp files are skipped" do @@ -39,4 +114,35 @@ test "verify webp files are skipped" do assert Filter.Exiftool.StripMetadata.filter(upload) == {:ok, :noop} end + + test "verify svg files are skipped" do + upload = %Pleroma.Upload{ + name: "sample.svg", + content_type: "image/svg+xml" + } + + assert Filter.Exiftool.StripMetadata.filter(upload) == {:ok, :noop} + end + + defp read_exif(file) do + # time and file path tags cause mismatches even for byte-identical files + {exif_data, 0} = + System.cmd("exiftool", [ + "-x", + "Time:All", + "-x", + "Directory", + "-x", + "FileName", + "-x", + "FileSize", + file + ]) + + exif_data + end + + defp write_exif(args) do + {_response, 0} = System.cmd("exiftool", ["-ignoreMinorErrors", "-overwrite_original" | args]) + end end From 5bc64c57534736eaa3da910916d2e5fdf6c385d9 Mon Sep 17 00:00:00 2001 From: Oneric Date: Fri, 26 Apr 2024 00:01:13 +0200 Subject: [PATCH 8/8] changelog: add note about StripMetadata and ReadDescription order --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 578d02964..49ee8f6f3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - Support for [FEP-fffd](https://codeberg.org/fediverse/fep/src/branch/main/fep/fffd/fep-fffd.md) (proxy objects) - Verified support for elixir 1.16 - Uploadfilter `Pleroma.Upload.Filter.Exiftool.ReadDescription` returns description values to the FE so they can pre fill the image description field + NOTE: this filter MUST be placed before `Exiftool.StripMetadata` to work ## Changed - Inbound pipeline error handing was modified somewhat, which should lead to less incomprehensible log spam. Hopefully.