From 1f07377cae026cd3c069bdd23001dda5daaf7ece Mon Sep 17 00:00:00 2001 From: Marcel Otto Date: Sat, 6 Jul 2019 01:59:03 +0200 Subject: [PATCH] Fix unicode escaping issue in RDF.Literal.matches --- lib/rdf/literal.ex | 23 ++++++++++++++++++++--- test/unit/literal_test.exs | 5 +++++ 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/lib/rdf/literal.ex b/lib/rdf/literal.ex index 6e03a93..2b1a1af 100644 --- a/lib/rdf/literal.ex +++ b/lib/rdf/literal.ex @@ -372,14 +372,31 @@ defmodule RDF.Literal do defp xpath_regex_pattern(pattern, flags) do with {:ok, regex} <- pattern - |> convert_utf16_escaping() + |> convert_utf_escaping() |> Regex.compile(xpath_regex_flags(flags)) do {:regex, regex} end end - defp convert_utf16_escaping(pattern) do - String.replace(pattern, ~r/\\U(([0-9]|[A-F]|[a-f]){2})(([0-9]|[A-F]|[a-f]){6})/, "\\u{\\3}") + @doc false + def convert_utf_escaping(string) do + require Integer + + xpath_unicode_regex = ~r/(\\*)\\U([0-9]|[A-F]|[a-f]){2}(([0-9]|[A-F]|[a-f]){6})/ + [first | possible_matches] = + Regex.split(xpath_unicode_regex, string, include_captures: true) + + [first | + Enum.map_every(possible_matches, 2, fn possible_xpath_unicode -> + [_, escapes, _, codepoint, _] = Regex.run(xpath_unicode_regex, possible_xpath_unicode) + if escapes |> String.length() |> Integer.is_odd() do + "#{escapes}\\u{#{codepoint}}" + else + "\\" <> possible_xpath_unicode + end + end) + ] + |> Enum.join() end defp xpath_regex_flags(flags) do diff --git a/test/unit/literal_test.exs b/test/unit/literal_test.exs index a5b69e5..e9eae1d 100644 --- a/test/unit/literal_test.exs +++ b/test/unit/literal_test.exs @@ -252,7 +252,12 @@ defmodule RDF.LiteralTest do {~L"abracadabra", ~L"^bra", false}, {@poem, ~L"Kaum.*krähen", false}, {@poem, ~L"^Kaum.*gesehen,$", false}, + {~L"foobar", ~L"foo$", false}, + + {~L"noe\u0308l", ~L"noe\\u0308l", true}, + {~L"noe\\u0308l", ~L"noe\\\\u0308l", true}, {~L"\u{01D4B8}", ~L"\\U0001D4B8", true}, + {~L"\\U0001D4B8", ~L"\\\U0001D4B8", true}, {~L"abracadabra"en, ~L"bra", true}, {"abracadabra", "bra", true},