Fix unicode escaping issue in RDF.Literal.matches
This commit is contained in:
parent
49a897d79b
commit
1f07377cae
2 changed files with 25 additions and 3 deletions
|
@ -372,14 +372,31 @@ defmodule RDF.Literal do
|
||||||
defp xpath_regex_pattern(pattern, flags) do
|
defp xpath_regex_pattern(pattern, flags) do
|
||||||
with {:ok, regex} <-
|
with {:ok, regex} <-
|
||||||
pattern
|
pattern
|
||||||
|> convert_utf16_escaping()
|
|> convert_utf_escaping()
|
||||||
|> Regex.compile(xpath_regex_flags(flags)) do
|
|> Regex.compile(xpath_regex_flags(flags)) do
|
||||||
{:regex, regex}
|
{:regex, regex}
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
defp convert_utf16_escaping(pattern) do
|
@doc false
|
||||||
String.replace(pattern, ~r/\\U(([0-9]|[A-F]|[a-f]){2})(([0-9]|[A-F]|[a-f]){6})/, "\\u{\\3}")
|
def convert_utf_escaping(string) do
|
||||||
|
require Integer
|
||||||
|
|
||||||
|
xpath_unicode_regex = ~r/(\\*)\\U([0-9]|[A-F]|[a-f]){2}(([0-9]|[A-F]|[a-f]){6})/
|
||||||
|
[first | possible_matches] =
|
||||||
|
Regex.split(xpath_unicode_regex, string, include_captures: true)
|
||||||
|
|
||||||
|
[first |
|
||||||
|
Enum.map_every(possible_matches, 2, fn possible_xpath_unicode ->
|
||||||
|
[_, escapes, _, codepoint, _] = Regex.run(xpath_unicode_regex, possible_xpath_unicode)
|
||||||
|
if escapes |> String.length() |> Integer.is_odd() do
|
||||||
|
"#{escapes}\\u{#{codepoint}}"
|
||||||
|
else
|
||||||
|
"\\" <> possible_xpath_unicode
|
||||||
|
end
|
||||||
|
end)
|
||||||
|
]
|
||||||
|
|> Enum.join()
|
||||||
end
|
end
|
||||||
|
|
||||||
defp xpath_regex_flags(flags) do
|
defp xpath_regex_flags(flags) do
|
||||||
|
|
|
@ -252,7 +252,12 @@ defmodule RDF.LiteralTest do
|
||||||
{~L"abracadabra", ~L"^bra", false},
|
{~L"abracadabra", ~L"^bra", false},
|
||||||
{@poem, ~L"Kaum.*krähen", false},
|
{@poem, ~L"Kaum.*krähen", false},
|
||||||
{@poem, ~L"^Kaum.*gesehen,$", false},
|
{@poem, ~L"^Kaum.*gesehen,$", false},
|
||||||
|
{~L"foobar", ~L"foo$", false},
|
||||||
|
|
||||||
|
{~L"noe\u0308l", ~L"noe\\u0308l", true},
|
||||||
|
{~L"noe\\u0308l", ~L"noe\\\\u0308l", true},
|
||||||
{~L"\u{01D4B8}", ~L"\\U0001D4B8", true},
|
{~L"\u{01D4B8}", ~L"\\U0001D4B8", true},
|
||||||
|
{~L"\\U0001D4B8", ~L"\\\U0001D4B8", true},
|
||||||
|
|
||||||
{~L"abracadabra"en, ~L"bra", true},
|
{~L"abracadabra"en, ~L"bra", true},
|
||||||
{"abracadabra", "bra", true},
|
{"abracadabra", "bra", true},
|
||||||
|
|
Loading…
Reference in a new issue