Exiftool: Strip all non-essential metadata tags #745

Merged
floatingghost merged 5 commits from Oneric/akkoma:exiftool-strip-all into develop 2024-04-26 17:38:47 +00:00
8 changed files with 166 additions and 18 deletions

View file

@ -10,10 +10,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
- Support for [FEP-fffd](https://codeberg.org/fediverse/fep/src/branch/main/fep/fffd/fep-fffd.md) (proxy objects)
- Verified support for elixir 1.16
- Uploadfilter `Pleroma.Upload.Filter.Exiftool.ReadDescription` returns description values to the FE so they can pre fill the image description field
NOTE: this filter MUST be placed before `Exiftool.StripMetadata` to work
## Changed
- Inbound pipeline error handing was modified somewhat, which should lead to less incomprehensible log spam. Hopefully.
- Uploadfilter `Pleroma.Upload.Filter.Exiftool` has been renamed to `Pleroma.Upload.Filter.Exiftool.StripMetadata`
- Uploadfilter `Pleroma.Upload.Filter.Exiftool` was replaced by `Pleroma.Upload.Filter.Exiftool.StripMetadata`;
the latter strips all non-essential metadata by default but can be configured.
To regain the old behaviour of only stripping GPS data set `purge: ["gps:all"]`.
## Fixed
- Issue preventing fetching anything from IPv6-only instances

View file

@ -222,6 +222,26 @@
}
]
},
%{
group: :pleroma,
key: Pleroma.Upload.Filter.Exiftool.StripMetadata,
type: :group,
description: "Strip specified metadata from image uploads",
children: [
%{
key: :purge,
description: "Metadata fields or groups to strip",
type: {:list, :string},
suggestions: ["all", "CommonIFD0"]
},
%{
key: :preserve,
description: "Metadata fields or groups to preserve (takes precedence over stripping)",
type: {:list, :string},
suggestions: ["ColorSpaces", "Orientation"]
}
]
},
%{
group: :pleroma,
key: Pleroma.Emails.Mailer,

View file

@ -37,7 +37,7 @@ If any of the options are left unspecified, you will be prompted interactively.
- `--static-dir <path>` - the directory custom public files should be read from (custom emojis, frontend bundle overrides, robots.txt, etc.)
- `--listen-ip <ip>` - the ip the app should listen to, defaults to 127.0.0.1
- `--listen-port <port>` - the port the app should listen to, defaults to 4000
- `--strip-uploads-metadata <Y|N>` - use ExifTool to strip uploads of sensitive metadata
- `--strip-uploads-metadata <Y|N>` - use ExifTool to strip uploads of metadata when possible
- `--read-uploads-description <Y|N>` - use ExifTool to read image descriptions from uploads
- `--anonymize-uploads <Y|N>` - randomize uploaded filenames
- `--dedupe-uploads <Y|N>` - store files based on their hash to reduce data storage requirements if duplicates are uploaded with different filenames

View file

@ -656,9 +656,10 @@ This filter replaces the declared filename (not the path) of an upload.
#### Pleroma.Upload.Filter.Exiftool.StripMetadata
This filter only strips the GPS and location metadata with Exiftool leaving color profiles and attributes intact.
This filter strips metadata with Exiftool leaving color profiles and orientation intact.
No specific configuration.
* `purge`: List of Exiftool tag names or tag group names to purge
* `preserve`: List of Exiftool tag names or tag group names to preserve even if they occur in the purge list
#### Pleroma.Upload.Filter.Exiftool.ReadDescription

View file

@ -14,7 +14,7 @@ Note: the packages are not required with the current default settings of Akkoma.
`ImageMagick` is a set of tools to create, edit, compose, or convert bitmap images.
It is required for the following Akkoma features:
* `Pleroma.Upload.Filters.Mogrify`, `Pleroma.Upload.Filters.Mogrifun` upload filters (related config: `Plaroma.Upload/filters` in `config/config.exs`)
* `Pleroma.Upload.Filters.Mogrify`, `Pleroma.Upload.Filters.Mogrifun` upload filters (related config: `Pleroma.Upload/filters` in `config/config.exs`)
Review

hehe plaroma

hehe plaroma
* Media preview proxy for still images (related config: `media_preview_proxy/enabled` in `config/config.exs`)
## `ffmpeg`
@ -29,5 +29,5 @@ It is required for the following Akkoma features:
`exiftool` is media files metadata reader/writer.
It is required for the following Akkoma features:
* `Pleroma.Upload.Filters.Exiftool.StripMetadata` upload filter (related config: `Plaroma.Upload/filters` in `config/config.exs`)
* `Pleroma.Upload.Filters.Exiftool.ReadDescription` upload filter (related config: `Plaroma.Upload/filters` in `config/config.exs`)
* `Pleroma.Upload.Filters.Exiftool.StripMetadata` upload filter (related config: `Pleroma.Upload/filters` in `config/config.exs`)
* `Pleroma.Upload.Filters.Exiftool.ReadDescription` upload filter (related config: `Pleroma.Upload/filters` in `config/config.exs`)

View file

@ -172,10 +172,10 @@ def run(["gen" | rest]) do
{strip_uploads_metadata_message, strip_uploads_metadata_default} =
if Pleroma.Utils.command_available?("exiftool") do
{"Do you want to strip location (GPS) data from uploaded images? This requires exiftool, it was detected as installed. (y/n)",
{"Do you want to strip metadata from uploaded images? This requires exiftool, it was detected as installed. (y/n)",
"y"}
else
{"Do you want to strip location (GPS) data from uploaded images? This requires exiftool, it was detected as not installed, please install it if you answer yes. (y/n)",
{"Do you want to strip metadata from uploaded images? This requires exiftool, it was detected as not installed, please install it if you answer yes. (y/n)",
"n"}
end

View file

@ -4,22 +4,40 @@
defmodule Pleroma.Upload.Filter.Exiftool.StripMetadata do
@moduledoc """
Strips GPS related EXIF tags and overwrites the file in place.
Tries to strip all image metadata but colorspace and orientation overwriting the file in place.
Also strips or replaces filesystem metadata e.g., timestamps.
"""
@behaviour Pleroma.Upload.Filter
alias Pleroma.Config
@purge_default ["all", "CommonIFD0"]
@preserve_default ["ColorSpaceTags", "Orientation"]
@spec filter(Pleroma.Upload.t()) :: {:ok, :noop} | {:ok, :filtered} | {:error, String.t()}
# Formats not compatible with exiftool at this time
def filter(%Pleroma.Upload{content_type: "image/heic"}), do: {:ok, :noop}
def filter(%Pleroma.Upload{content_type: "image/webp"}), do: {:ok, :noop}
def filter(%Pleroma.Upload{content_type: "image/svg+xml"}), do: {:ok, :noop}
def filter(%Pleroma.Upload{content_type: "image/jxl"}), do: {:ok, :noop}
def filter(%Pleroma.Upload{tempfile: file, content_type: "image" <> _}) do
purge_args =
Config.get([__MODULE__, :purge], @purge_default)
|> Enum.map(fn mgroup -> "-" <> mgroup <> "=" end)
preserve_args =
Config.get([__MODULE__, :preserve], @preserve_default)
|> Enum.map(fn mgroup -> "-" <> mgroup end)
|> then(fn
# If -TagsFromFile is not followed by tag selectors, it will copy most available tags
[] -> []
args -> ["-TagsFromFile", "@" | args]
end)
args = ["-ignoreMinorErrors", "-overwrite_original" | purge_args] ++ preserve_args ++ [file]
try do
case System.cmd("exiftool", ["-overwrite_original", "-gps:all=", file], parallelism: true) do
case System.cmd("exiftool", args, parallelism: true) do
{_response, 0} -> {:ok, :filtered}
{error, 1} -> {:error, error}
end

View file

@ -6,29 +6,104 @@ defmodule Pleroma.Upload.Filter.Exiftool.StripMetadataTest do
use Pleroma.DataCase
alias Pleroma.Upload.Filter
test "apply exiftool filter" do
@tag :tmp_dir
test "exiftool strip metadata strips GPS etc but preserves Orientation and ColorSpace by default",
%{tmp_dir: tmp_dir} do
assert Pleroma.Utils.command_available?("exiftool")
tmpfile = Path.join(tmp_dir, "tmp.jpg")
File.cp!(
"test/fixtures/DSCN0010.jpg",
"test/fixtures/DSCN0010_tmp.jpg"
tmpfile
)
upload = %Pleroma.Upload{
name: "image_with_GPS_data.jpg",
content_type: "image/jpeg",
path: Path.absname("test/fixtures/DSCN0010.jpg"),
tempfile: Path.absname("test/fixtures/DSCN0010_tmp.jpg")
tempfile: Path.absname(tmpfile)
}
assert Filter.Exiftool.StripMetadata.filter(upload) == {:ok, :filtered}
{exif_original, 0} = System.cmd("exiftool", ["test/fixtures/DSCN0010.jpg"])
{exif_filtered, 0} = System.cmd("exiftool", ["test/fixtures/DSCN0010_tmp.jpg"])
exif_original = read_exif("test/fixtures/DSCN0010.jpg")
exif_filtered = read_exif(tmpfile)
refute exif_original == exif_filtered
assert String.match?(exif_original, ~r/GPS/)
refute String.match?(exif_filtered, ~r/GPS/)
assert String.match?(exif_original, ~r/Camera Model Name/)
refute String.match?(exif_filtered, ~r/Camera Model Name/)
assert String.match?(exif_original, ~r/Orientation/)
assert String.match?(exif_filtered, ~r/Orientation/)
assert String.match?(exif_original, ~r/Color Space/)
assert String.match?(exif_filtered, ~r/Color Space/)
end
# this is a nonsensical configuration, but it shouldn't explode
Review

good call, we all know the first thing someone will do is supply a nonsensical config :hehe:

good call, we all know the first thing someone will do is supply a nonsensical config :hehe:
@tag :tmp_dir
test "exiftool strip metadata is a noop with empty purge list", %{tmp_dir: tmp_dir} do
assert Pleroma.Utils.command_available?("exiftool")
clear_config([Pleroma.Upload.Filter.Exiftool.StripMetadata, :purge], [])
tmpfile = Path.join(tmp_dir, "tmp.jpg")
File.cp!(
"test/fixtures/DSCN0010.jpg",
tmpfile
)
upload = %Pleroma.Upload{
name: "image_with_GPS_data.jpg",
content_type: "image/jpeg",
path: Path.absname("test/fixtures/DSCN0010.jpg"),
tempfile: Path.absname(tmpfile)
}
assert Filter.Exiftool.StripMetadata.filter(upload) == {:ok, :filtered}
exif_original = read_exif("test/fixtures/DSCN0010.jpg")
exif_filtered = read_exif(tmpfile)
assert exif_original == exif_filtered
end
@tag :tmp_dir
test "exiftool strip metadata works with empty preserve list", %{tmp_dir: tmp_dir} do
assert Pleroma.Utils.command_available?("exiftool")
clear_config([Pleroma.Upload.Filter.Exiftool.StripMetadata, :preserve], [])
tmpfile = Path.join(tmp_dir, "tmp.jpg")
File.cp!(
"test/fixtures/DSCN0010.jpg",
tmpfile
)
upload = %Pleroma.Upload{
name: "image_with_GPS_data.jpg",
content_type: "image/jpeg",
path: Path.absname("test/fixtures/DSCN0010.jpg"),
tempfile: Path.absname(tmpfile)
}
write_exif(["-ImageDescription=Trees and Houses", "-Orientation=1", tmpfile])
exif_extended = read_exif(tmpfile)
assert String.match?(exif_extended, ~r/Image Description[ \t]*:[ \t]*Trees and Houses/)
assert String.match?(exif_extended, ~r/Orientation/)
assert Filter.Exiftool.StripMetadata.filter(upload) == {:ok, :filtered}
exif_original = read_exif("test/fixtures/DSCN0010.jpg")
exif_filtered = read_exif(tmpfile)
refute exif_original == exif_filtered
refute exif_extended == exif_filtered
assert String.match?(exif_original, ~r/GPS/)
refute String.match?(exif_filtered, ~r/GPS/)
refute String.match?(exif_filtered, ~r/Image Description/)
refute String.match?(exif_filtered, ~r/Orientation/)
end
test "verify webp files are skipped" do
@ -39,4 +114,35 @@ test "verify webp files are skipped" do
assert Filter.Exiftool.StripMetadata.filter(upload) == {:ok, :noop}
end
test "verify svg files are skipped" do
upload = %Pleroma.Upload{
name: "sample.svg",
content_type: "image/svg+xml"
}
assert Filter.Exiftool.StripMetadata.filter(upload) == {:ok, :noop}
end
defp read_exif(file) do
# time and file path tags cause mismatches even for byte-identical files
{exif_data, 0} =
System.cmd("exiftool", [
"-x",
"Time:All",
"-x",
"Directory",
"-x",
"FileName",
"-x",
"FileSize",
file
])
exif_data
end
defp write_exif(args) do
{_response, 0} = System.cmd("exiftool", ["-ignoreMinorErrors", "-overwrite_original" | args])
end
end