Fix UTF-16 character handling in regex patterns

This commit is contained in:
Marcel Otto 2019-06-23 15:47:18 +02:00
parent dc2b070e05
commit 49a897d79b
2 changed files with 9 additions and 1 deletions

View file

@ -370,11 +370,18 @@ defmodule RDF.Literal do
end end
defp xpath_regex_pattern(pattern, flags) do defp xpath_regex_pattern(pattern, flags) do
with {:ok, regex} <- Regex.compile(pattern, xpath_regex_flags(flags)) do with {:ok, regex} <-
pattern
|> convert_utf16_escaping()
|> Regex.compile(xpath_regex_flags(flags)) do
{:regex, regex} {:regex, regex}
end end
end end
defp convert_utf16_escaping(pattern) do
String.replace(pattern, ~r/\\U(([0-9]|[A-F]|[a-f]){2})(([0-9]|[A-F]|[a-f]){6})/, "\\u{\\3}")
end
defp xpath_regex_flags(flags) do defp xpath_regex_flags(flags) do
String.replace(flags, "q", "") <> "u" String.replace(flags, "q", "") <> "u"
end end

View file

@ -252,6 +252,7 @@ defmodule RDF.LiteralTest do
{~L"abracadabra", ~L"^bra", false}, {~L"abracadabra", ~L"^bra", false},
{@poem, ~L"Kaum.*krähen", false}, {@poem, ~L"Kaum.*krähen", false},
{@poem, ~L"^Kaum.*gesehen,$", false}, {@poem, ~L"^Kaum.*gesehen,$", false},
{~L"\u{01D4B8}", ~L"\\U0001D4B8", true},
{~L"abracadabra"en, ~L"bra", true}, {~L"abracadabra"en, ~L"bra", true},
{"abracadabra", "bra", true}, {"abracadabra", "bra", true},