From 1aa2f22b92f79066b580b75db7d70a734850393b Mon Sep 17 00:00:00 2001 From: Marcel Otto Date: Wed, 10 Jun 2020 01:13:01 +0200 Subject: [PATCH] Add RDF.Query.BGP builder --- lib/rdf/exceptions.ex | 5 ++ lib/rdf/query/bgp.ex | 148 +++++++++++++++++++++++++++++++++ test/unit/query/bgp_test.exs | 154 +++++++++++++++++++++++++++++++++++ 3 files changed, 307 insertions(+) create mode 100644 test/unit/query/bgp_test.exs diff --git a/lib/rdf/exceptions.ex b/lib/rdf/exceptions.ex index ee90f21..1f756d7 100644 --- a/lib/rdf/exceptions.ex +++ b/lib/rdf/exceptions.ex @@ -54,3 +54,8 @@ end defmodule RDF.Namespace.UndefinedTermError do defexception [:message] end + + +defmodule RDF.Query.InvalidError do + defexception [:message] +end diff --git a/lib/rdf/query/bgp.ex b/lib/rdf/query/bgp.ex index cf89e39..99e0dd3 100644 --- a/lib/rdf/query/bgp.ex +++ b/lib/rdf/query/bgp.ex @@ -2,6 +2,9 @@ defmodule RDF.Query.BGP do @enforce_keys [:triple_patterns] defstruct [:triple_patterns] + alias RDF.{IRI, BlankNode, Literal, Namespace} + import RDF.Utils.Guards + @type variable :: String.t @type triple_pattern :: { subject :: variable | RDF.Term.t, @@ -13,6 +16,151 @@ defmodule RDF.Query.BGP do @type t :: %__MODULE__{triple_patterns: triple_patterns} + def new(query) do + case new!(query) do + {:ok, bgp} -> bgp + {:error, error} -> raise error + end + end + + def new!(query) do + with {:ok, triple_patterns} <- triple_patterns(query) do + {:ok, %__MODULE__{triple_patterns: triple_patterns}} + end + end + + defp triple_patterns(query) when is_list(query) do + Enum.reduce_while(query, {:ok, []}, fn + triple, {:ok, triple_patterns} -> + case triple_pattern(triple) do + {:ok, triple_pattern} -> + {:cont, {:ok, triple_patterns ++ List.wrap(triple_pattern)}} + + {:error, error} -> + {:halt, {:error, error}} + end + end) + end + + defp triple_patterns(triple_pattern) when is_tuple(triple_pattern), + do: triple_patterns([triple_pattern]) + + defp triple_pattern({subject, predicate, object}) + when not is_list(predicate) and not is_list(object) do + with {:ok, subject_pattern} <- subject_pattern(subject), + {:ok, predicate_pattern} <- predicate_pattern(predicate), + {:ok, object_pattern} <- object_pattern(object) do + {:ok, {subject_pattern, predicate_pattern, object_pattern}} + end + end + + defp triple_pattern(combined_objects_triple_pattern) when is_tuple(combined_objects_triple_pattern) do + [subject | rest] = Tuple.to_list(combined_objects_triple_pattern) + + case rest do + [predicate | objects] when not is_list(predicate) -> + if Enum.all?(objects, &(not is_list(&1))) do + objects + |> Enum.map(fn object -> {subject, predicate, object} end) + |> triple_patterns() + else + {:error, %RDF.Query.InvalidError{ + message: "Invalid use of predicate-object pair brackets"} + } + end + + predicate_object_pairs -> + if Enum.all?(predicate_object_pairs, &(is_list(&1) and length(&1) > 1)) do + predicate_object_pairs + |> Enum.flat_map(fn [predicate | objects] -> + Enum.map(objects, fn object -> {subject, predicate, object} end) + end) + |> triple_patterns() + else + {:error, %RDF.Query.InvalidError{ + message: "Invalid use of predicate-object pair brackets"} + } + end + end + end + + defp subject_pattern(subject) do + value = variable(subject) || resource(subject) + + if value do + {:ok, value} + else + {:error, %RDF.Query.InvalidError{ + message: "Invalid subject term in BGP triple pattern: #{inspect subject}"} + } + end + end + + defp predicate_pattern(predicate) do + value = variable(predicate) || resource(predicate) || property(predicate) + + if value do + {:ok, value} + else + {:error, %RDF.Query.InvalidError{ + message: "Invalid predicate term in BGP triple pattern: #{inspect predicate}"} + } + end + end + + defp object_pattern(object) do + value = variable(object) || resource(object) || literal(object) + + if value do + {:ok, value} + else + {:error, %RDF.Query.InvalidError{ + message: "Invalid object term in BGP triple pattern: #{inspect object}"} + } + end + end + + defp variable(var) when is_atom(var) do + var_string = to_string(var) + + if String.ends_with?(var_string, "?") do + var_string + |> String.slice(0..-2) + |> String.to_atom() + end + end + + defp variable(_), do: nil + + defp resource(%IRI{} = iri), do: iri + defp resource(%URI{} = uri), do: IRI.new(uri) + defp resource(%BlankNode{} = bnode), do: bnode + + defp resource(var) when is_ordinary_atom(var) do + case to_string(var) do + "_" <> bnode -> + BlankNode.new(bnode) + + _ -> + case Namespace.resolve_term(var) do + {:ok, iri} -> iri + _ -> nil + end + end + end + + defp resource(_), do: nil + + defp property(:a), do: RDF.type() + defp property(_), do: nil + + defp literal(%Literal{} = literal), do: literal + defp literal(value), do: Literal.coerce(value) + + + @doc """ + Return a list of all variables in a BGP. + """ def variables(%__MODULE__{triple_patterns: triple_patterns}), do: variables(triple_patterns) def variables(triple_patterns) when is_list(triple_patterns) do diff --git a/test/unit/query/bgp_test.exs b/test/unit/query/bgp_test.exs new file mode 100644 index 0000000..9050f71 --- /dev/null +++ b/test/unit/query/bgp_test.exs @@ -0,0 +1,154 @@ +defmodule RDF.Query.BGPTest do + use RDF.Test.Case + + alias RDF.Query.BGP + + defp bgp(triple_patterns) when is_list(triple_patterns), + do: %BGP{triple_patterns: triple_patterns} + + describe "new/1" do + test "empty triple pattern" do + assert BGP.new([]) == bgp([]) + end + + test "one triple pattern doesn't require list brackets" do + assert BGP.new({EX.s, EX.p, EX.o}) == + bgp [{EX.s, EX.p, EX.o}] + end + + test "variables" do + assert BGP.new([{:s?, :p?, :o?}]) == bgp [{:s, :p, :o}] + end + + test "blank nodes" do + assert BGP.new([{RDF.bnode("s"), RDF.bnode("p"), RDF.bnode("o")}]) == + bgp [{RDF.bnode("s"), RDF.bnode("p"), RDF.bnode("o")}] + end + + test "blank nodes as atoms" do + assert BGP.new([{:_s, :_p, :_o}]) == + bgp [{RDF.bnode("s"), RDF.bnode("p"), RDF.bnode("o")}] + end + + test "variable notation has precedence over blank node notation" do + assert BGP.new([{:_s?, :_p?, :_o?}]) == bgp [{:_s, :_p, :_o}] + end + + test "IRIs" do + assert BGP.new([{ + RDF.iri("http://example.com/s"), + RDF.iri("http://example.com/p"), + RDF.iri("http://example.com/o")}] + ) == bgp [{EX.s, EX.p, EX.o}] + + assert BGP.new([{ + ~I, + ~I, + ~I}] + ) == bgp [{EX.s, EX.p, EX.o}] + + assert BGP.new([{EX.s, EX.p, EX.o}]) == + bgp [{EX.s, EX.p, EX.o}] + end + + test "vocabulary term atoms" do + assert BGP.new([{EX.S, EX.P, EX.O}]) == + bgp [{RDF.iri(EX.S), RDF.iri(EX.P), RDF.iri(EX.O)}] + end + + test "special :a atom for rdf:type" do + assert BGP.new([{EX.S, :a, EX.O}]) == + bgp [{RDF.iri(EX.S), RDF.type, RDF.iri(EX.O)}] + end + + test "URIs" do + assert BGP.new([{ + URI.parse("http://example.com/s"), + URI.parse("http://example.com/p"), + URI.parse("http://example.com/o")}] + ) == bgp [{EX.s, EX.p, EX.o}] + end + + test "literals" do + assert BGP.new([{EX.s, EX.p, ~L"foo"}]) == + bgp [{EX.s, EX.p, ~L"foo"}] + end + + test "values coercible to literals" do + assert BGP.new([{EX.s, EX.p, "foo"}]) == + bgp [{EX.s, EX.p, ~L"foo"}] + assert BGP.new([{EX.s, EX.p, 42}]) == + bgp [{EX.s, EX.p, RDF.literal(42)}] + assert BGP.new([{EX.s, EX.p, true}]) == + bgp [{EX.s, EX.p, XSD.true}] + end + + test "literals on non-object positions" do + assert_raise RDF.Query.InvalidError, fn -> + assert BGP.new([{~L"foo", EX.p, ~L"bar"}]) + end + end + + test "multiple triple patterns" do + assert BGP.new([ + {EX.S, EX.p, :o?}, + {:o?, EX.p2, 42} + ]) == + bgp [ + {RDF.iri(EX.S), EX.p, :o}, + {:o, EX.p2, RDF.literal(42)} + ] + end + + test "multiple objects to the same subject-predicate" do + assert BGP.new([{EX.s, EX.p, EX.o1, EX.o2}]) == + bgp [ + {EX.s, EX.p, EX.o1}, + {EX.s, EX.p, EX.o2} + ] + + assert BGP.new({EX.s, EX.p, EX.o1, EX.o2}) == + bgp [ + {EX.s, EX.p, EX.o1}, + {EX.s, EX.p, EX.o2} + ] + + assert BGP.new({EX.s, EX.p, :o?, false, 42, "foo"}) == + bgp [ + {EX.s, EX.p, :o}, + {EX.s, EX.p, XSD.false}, + {EX.s, EX.p, RDF.literal(42)}, + {EX.s, EX.p, RDF.literal("foo")} + ] + end + + test "multiple predicate-object pairs to the same subject" do + assert BGP.new([{ + EX.s, + [EX.p1, EX.o1], + [EX.p2, EX.o2], + }]) == + bgp [ + {EX.s, EX.p1, EX.o1}, + {EX.s, EX.p2, EX.o2} + ] + + assert BGP.new([{ + EX.s, + [:a, :o?], + [EX.p1, 42, 3.14], + [EX.p2, "foo", true], + }]) == + bgp [ + {EX.s, RDF.type, :o}, + {EX.s, EX.p1, RDF.literal(42)}, + {EX.s, EX.p1, RDF.literal(3.14)}, + {EX.s, EX.p2, RDF.literal("foo")}, + {EX.s, EX.p2, XSD.true} + ] + + assert BGP.new([{EX.s, [EX.p, EX.o]}]) == + bgp [{EX.s, EX.p, EX.o}] + end + end +end