Fix emoji qualification (#124)
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful

Reviewed-on: #124
This commit is contained in:
floatingghost 2022-07-28 12:02:36 +00:00
parent 52095ff4de
commit 405406601f
7 changed files with 120 additions and 4 deletions

View file

@ -9,6 +9,7 @@ defmodule Pleroma.Emoji do
"""
use GenServer
alias Pleroma.Emoji.Combinations
alias Pleroma.Emoji.Loader
require Logger
@ -124,7 +125,7 @@ defp update_emojis(emojis) do
|> String.split("\n")
|> Enum.filter(fn line ->
line != "" and not String.starts_with?(line, "#") and
String.contains?(line, "qualified")
String.contains?(line, "fully-qualified")
end)
|> Enum.map(fn line ->
line
@ -186,4 +187,17 @@ def emoji_url(%{"type" => "EmojiReact", "content" => emoji, "tag" => tags}) do
end
def emoji_url(_), do: nil
emoji_qualification_map =
emojis
|> Enum.filter(&String.contains?(&1, "\uFE0F"))
|> Combinations.variate_emoji_qualification()
for {qualified, unqualified_list} <- emoji_qualification_map do
for unqualified <- unqualified_list do
def fully_qualify_emoji(unquote(unqualified)), do: unquote(qualified)
end
end
def fully_qualify_emoji(emoji), do: emoji
end

View file

@ -0,0 +1,45 @@
# Pleroma: A lightweight social networking server
# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Emoji.Combinations do
# FE0F is the emoji variation sequence. It is used for fully-qualifying
# emoji, and that includes emoji combinations.
# This code generates combinations per emoji: for each FE0F, all possible
# combinations of the character being removed or staying will be generated.
# This is made as an attempt to find all partially-qualified and unqualified
# versions of a fully-qualified emoji.
# I have found *no cases* for which this would be a problem, after browsing
# the entire emoji list in emoji-test.txt. This is safe, and, sadly, most
# likely sane too.
defp qualification_combinations(codepoints) do
qualification_combinations([[]], codepoints)
end
defp qualification_combinations(acc, []), do: acc
defp qualification_combinations(acc, ["\uFE0F" | tail]) do
acc
|> Enum.flat_map(fn x -> [x, x ++ ["\uFE0F"]] end)
|> qualification_combinations(tail)
end
defp qualification_combinations(acc, [codepoint | tail]) do
acc
|> Enum.map(&Kernel.++(&1, [codepoint]))
|> qualification_combinations(tail)
end
def variate_emoji_qualification(emoji) when is_binary(emoji) do
emoji
|> String.codepoints()
|> qualification_combinations()
|> Enum.map(&List.to_string/1)
end
def variate_emoji_qualification(emoji) when is_list(emoji) do
emoji
|> Enum.map(fn emoji -> {emoji, variate_emoji_qualification(emoji)} end)
end
end

View file

@ -114,6 +114,7 @@ def call(conn = %{method: method}, url, opts) when method in @methods do
else
{:ok, true} ->
conn
|> put_private(:proxied_url, url)
|> error_or_redirect(500, "Request failed", opts)
|> halt()

View file

@ -37,6 +37,13 @@ defp to_es({:filter, [field, query]}) do
end
def parse(q) do
Enum.map(q, &to_es/1)
[
%{
exists: %{
field: "content"
}
}
] ++
Enum.map(q, &to_es/1)
end
end

View file

@ -53,6 +53,7 @@ def changeset(struct, data) do
defp fix(data) do
data =
data
|> fix_emoji_qualification()
|> CommonFixes.fix_actor()
|> CommonFixes.fix_activity_addressing()
@ -77,6 +78,23 @@ defp fix(data) do
defp matches_shortcode?(nil), do: false
defp matches_shortcode?(s), do: Regex.match?(@emoji_regex, s)
defp fix_emoji_qualification(%{"content" => emoji} = data) do
new_emoji = Pleroma.Emoji.fully_qualify_emoji(emoji)
cond do
Pleroma.Emoji.is_unicode_emoji?(emoji) ->
data
Pleroma.Emoji.is_unicode_emoji?(new_emoji) ->
data |> Map.put("content", new_emoji)
true ->
data
end
end
defp fix_emoji_qualification(data), do: data
defp validate_emoji(cng) do
content = get_field(cng, :content)

View file

@ -13,8 +13,8 @@ test "tells if a string is an unicode emoji" do
# Accept fully-qualified and unqualified emoji
# See http://www.unicode.org/reports/tr51/
assert Emoji.is_unicode_emoji?("")
assert Emoji.is_unicode_emoji?("")
refute Emoji.is_unicode_emoji?("")
refute Emoji.is_unicode_emoji?("")
assert Emoji.is_unicode_emoji?("🥺")
assert Emoji.is_unicode_emoji?("🤰")

View file

@ -86,6 +86,37 @@ test "it works for incoming custom emoji reactions" do
)
end
test "it works for incoming unqualified emoji reactions" do
user = insert(:user)
other_user = insert(:user, local: false)
{:ok, activity} = CommonAPI.post(user, %{status: "hello"})
# woman detective emoji, unqualified
unqualified_emoji = [0x1F575, 0x200D, 0x2640] |> List.to_string()
data =
File.read!("test/fixtures/emoji-reaction.json")
|> Jason.decode!()
|> Map.put("object", activity.data["object"])
|> Map.put("actor", other_user.ap_id)
|> Map.put("content", unqualified_emoji)
{:ok, %Activity{data: data, local: false}} = Transmogrifier.handle_incoming(data)
assert data["actor"] == other_user.ap_id
assert data["type"] == "EmojiReact"
assert data["id"] == "http://mastodon.example.org/users/admin#reactions/2"
assert data["object"] == activity.data["object"]
# woman detective emoji, fully qualified
emoji = [0x1F575, 0xFE0F, 0x200D, 0x2640, 0xFE0F] |> List.to_string()
assert data["content"] == emoji
object = Object.get_by_ap_id(data["object"])
assert object.data["reaction_count"] == 1
assert match?([[^emoji, _, _]], object.data["reactions"])
end
test "it reject invalid emoji reactions" do
user = insert(:user)
other_user = insert(:user, local: false)