From 489e964c6d5b180848001201c5069db611f1d9aa Mon Sep 17 00:00:00 2001 From: Marcel Otto Date: Sat, 20 Apr 2019 23:33:09 +0200 Subject: [PATCH] Add RDF.Literal.matches?/3 --- CHANGELOG.md | 11 +++++++ lib/rdf/literal.ex | 57 ++++++++++++++++++++++++++++++++++++ test/unit/literal_test.exs | 59 +++++++++++++++++++++++++++++++++++++- 3 files changed, 126 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 18d24d0..65f97d5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,17 @@ This project adheres to [Semantic Versioning](http://semver.org/) and [Keep a CHANGELOG](http://keepachangelog.com). +## Unreleased + +### Added + +- `RDF.Literal.matches?/3` for XQuery regex pattern matching + + +[Compare v0.6.0...HEAD](https://github.com/marcelotto/rdf-ex/compare/v0.6.0...HEAD) + + + ## 0.6.0 - 2019-04-06 ### Added diff --git a/lib/rdf/literal.ex b/lib/rdf/literal.ex index 77faae9..4c98ef2 100644 --- a/lib/rdf/literal.ex +++ b/lib/rdf/literal.ex @@ -319,6 +319,63 @@ defmodule RDF.Literal do def compare(_, _), do: nil + + @doc """ + Matches the string representation of the given value against a XPath and XQuery regular expression pattern. + + The regular expression language is defined in _XQuery 1.0 and XPath 2.0 Functions and Operators_. + + The `pattern` and the optional `flags` can be given as an Elixir string or as + `xsd:string` `RDF.Literal`s. + + see + """ + def matches?(value, pattern, flags \\ "") + + def matches?(value, %RDF.Literal{datatype: @xsd_string} = pattern, flags), + do: matches?(value, pattern.value, flags) + + def matches?(value, pattern, %RDF.Literal{datatype: @xsd_string} = flags), + do: matches?(value, pattern, flags.value) + + def matches?(value, pattern, flags) when is_binary(pattern) and is_binary(flags) do + string = to_string(value) + case xpath_pattern(pattern, flags) do + {:regex, regex} -> + Regex.match?(regex, string) + + {:q, pattern} -> + String.contains?(string, pattern) + + {:qi, pattern} -> + string + |> String.downcase() + |> String.contains?(String.downcase(pattern)) + + _ -> + raise "Invalid XQuery regex pattern or flags" + end + end + + defp xpath_pattern(pattern, flags) do + q_pattern(pattern, flags) || xpath_regex_pattern(pattern, flags) + end + + defp q_pattern(pattern, flags) do + if String.contains?(flags, "q") and String.replace(flags, ~r/[qi]/, "") == "" do + {(if String.contains?(flags, "i"), do: :qi, else: :q), pattern} + end + end + + defp xpath_regex_pattern(pattern, flags) do + with {:ok, regex} <- Regex.compile(pattern, xpath_regex_flags(flags)) do + {:regex, regex} + end + end + + defp xpath_regex_flags(flags) do + String.replace(flags, "q", "") <> "u" + end end defimpl String.Chars, for: RDF.Literal do diff --git a/test/unit/literal_test.exs b/test/unit/literal_test.exs index e0ac470..61ed54c 100644 --- a/test/unit/literal_test.exs +++ b/test/unit/literal_test.exs @@ -232,6 +232,64 @@ defmodule RDF.LiteralTest do end + @poem RDF.string """ + + Kaum hat dies der Hahn gesehen, + Fängt er auch schon an zu krähen: + Kikeriki! Kikikerikih!! + Tak, tak, tak! - da kommen sie. + + """ + + describe "matches?" do + test "without flags" do + [ + {~L"abracadabra", ~L"bra", true}, + {~L"abracadabra", ~L"^a.*a$", true}, + {~L"abracadabra", ~L"^bra", false}, + {@poem, ~L"Kaum.*krähen", false}, + {@poem, ~L"^Kaum.*gesehen,$", false}, + + {~L"abracadabra"en, ~L"bra", true}, + {"abracadabra", "bra", true}, + {RDF.integer("42"), ~L"4", true}, + {RDF.integer("42"), ~L"en", false}, + ] + |> Enum.each(fn {literal, pattern, expected_result} -> + result = Literal.matches?(literal, pattern) + assert result == expected_result, + "expected RDF.Literal.matches?(#{inspect literal}, #{inspect pattern}) to return #{inspect expected_result}, but got #{result}" + end) + end + + test "with flags" do + [ + {@poem, ~L"Kaum.*krähen", ~L"s", true}, + {@poem, ~L"^Kaum.*gesehen,$", ~L"m", true}, + {@poem, ~L"kiki", ~L"i", true}, + ] + |> Enum.each(fn {literal, pattern, flags, result} -> + assert Literal.matches?(literal, pattern, flags) == result + end) + end + + test "with q flag" do + [ + {~L"abcd", ~L".*", ~L"q", false}, + {~L"Mr. B. Obama", ~L"B. OBAMA", ~L"iq", true}, + + # If the q flag is used together with the m, s, or x flag, that flag has no effect. + {~L"abcd", ~L".*", ~L"mq", true}, + {~L"abcd", ~L".*", ~L"qim", true}, + {~L"abcd", ~L".*", ~L"xqm", true}, + ] + |> Enum.each(fn {literal, pattern, flags, result} -> + assert Literal.matches?(literal, pattern, flags) == result + end) + end + end + + describe "String.Chars protocol implementation" do Enum.each values(:all_plain), fn value -> @tag value: value @@ -258,7 +316,6 @@ defmodule RDF.LiteralTest do assert to_string(literal) == rep end end) - end end