rdf-ex/lib/rdf/xsd/utils/regex.ex
2020-06-29 10:37:42 +02:00

86 lines
2.6 KiB
Elixir

defmodule RDF.XSD.Utils.Regex do
@moduledoc !"""
XSD-flavoured regex matching.
This is not intended to be used directly.
Use `c:RDF.XSD.Datatype.matches?/3` implementations on the datatypes or
`RDF.Literal.matches?/3` instead.
"""
@doc """
Matches the string representation of the given value against a XPath and XQuery regular expression pattern.
The regular expression language is defined in _XQuery 1.0 and XPath 2.0 Functions and Operators_.
see <https://www.w3.org/TR/xpath-functions/#func-matches>
"""
@spec matches?(String.t(), String.t(), String.t()) :: boolean
def matches?(value, pattern, flags \\ "") do
string = to_string(value)
case xpath_pattern(pattern, flags) do
{:regex, regex} ->
Regex.match?(regex, string)
{:q, pattern} ->
String.contains?(string, pattern)
{:qi, pattern} ->
string
|> String.downcase()
|> String.contains?(String.downcase(pattern))
{:error, error} ->
raise "Invalid XQuery regex pattern or flags: #{inspect(error)}"
end
end
@spec xpath_pattern(String.t(), String.t()) ::
{:q | :qi, String.t()} | {:regex, Regex.t()} | {:error, any}
def xpath_pattern(pattern, flags)
def xpath_pattern(pattern, flags) when is_binary(pattern) and is_binary(flags) do
q_pattern(pattern, flags) || xpath_regex_pattern(pattern, flags)
end
defp q_pattern(pattern, flags) do
if String.contains?(flags, "q") and String.replace(flags, ~r/[qi]/, "") == "" do
{if(String.contains?(flags, "i"), do: :qi, else: :q), pattern}
end
end
defp xpath_regex_pattern(pattern, flags) do
with {:ok, regex} <-
pattern
|> convert_utf_escaping()
|> Regex.compile(xpath_regex_flags(flags)) do
{:regex, regex}
end
end
@spec convert_utf_escaping(String.t()) :: String.t()
def convert_utf_escaping(string) do
require Integer
xpath_unicode_regex = ~r/(\\*)\\U([0-9]|[A-F]|[a-f]){2}(([0-9]|[A-F]|[a-f]){6})/
[first | possible_matches] = Regex.split(xpath_unicode_regex, string, include_captures: true)
[
first
| Enum.map_every(possible_matches, 2, fn possible_xpath_unicode ->
[_, escapes, _, codepoint, _] = Regex.run(xpath_unicode_regex, possible_xpath_unicode)
if escapes |> String.length() |> Integer.is_odd() do
"#{escapes}\\u{#{codepoint}}"
else
"\\" <> possible_xpath_unicode
end
end)
]
|> Enum.join()
end
defp xpath_regex_flags(flags) do
String.replace(flags, "q", "") <> "u"
end
end