diff --git a/lib/rdf/query/bgp.ex b/lib/rdf/query/bgp.ex new file mode 100644 index 0000000..3e7b769 --- /dev/null +++ b/lib/rdf/query/bgp.ex @@ -0,0 +1,11 @@ +defmodule RDF.Query.BGP do + @moduledoc """ + An interface for various BGP algorithm implementations. + """ + + @type solution :: map + @type solutions :: [solution] + + @callback query(triple_patterns :: [], data :: RDF.Data.t) :: solutions + +end diff --git a/lib/rdf/query/bgp/simple.ex b/lib/rdf/query/bgp/simple.ex new file mode 100644 index 0000000..49630db --- /dev/null +++ b/lib/rdf/query/bgp/simple.ex @@ -0,0 +1,207 @@ +defmodule RDF.Query.BGP.Simple do + @behaviour RDF.Query.BGP + + alias RDF.{Graph, Description, BlankNode} + + @blank_node_prefix "_:" + + @impl RDF.Query.BGP + def query(data, pattern) + + def query(_, []), do: [%{}] # https://www.w3.org/TR/sparql11-query/#emptyGroupPattern + + def query(data, triple_patterns) do + triple_patterns + |> Stream.map(&convert_blank_nodes/1) + |> Enum.sort_by(&triple_priority/1) + |> do_matching(data) + |> Enum.map(&remove_blank_nodes/1) + end + + + defp convert_blank_nodes({%BlankNode{} = s, p, o}), do: convert_blank_nodes({to_string(s), p, o}) + defp convert_blank_nodes({s, %BlankNode{} = p, o}), do: convert_blank_nodes({s, to_string(p), o}) + defp convert_blank_nodes({s, p, %BlankNode{} = o}), do: convert_blank_nodes({s, p, to_string(o)}) + defp convert_blank_nodes(triple_pattern), do: triple_pattern + + defp remove_blank_nodes(solution) do + solution + |> Enum.filter(fn + {@blank_node_prefix <> _, _} -> false + _ -> true + end) + |> Map.new + end + + + defp do_matching(triple_patterns, data, solutions \\ []) + + defp do_matching([], _, solutions), do: solutions + + defp do_matching([triple_pattern | remaining], data, acc) do + solutions = match(data, triple_pattern, acc) + + if solutions not in [nil, []] do + remaining + |> mark_solved_variables(solutions) + |> Enum.sort_by(&triple_priority/1) + |> do_matching(data, solutions) + else + [] + end + end + + + defp match(data, {s, p, o} = triple_pattern, existing_solutions) + when is_tuple(s) or is_tuple(p) or is_tuple(o) do + triple_pattern + |> apply_solutions(existing_solutions) + |> Enum.flat_map(&(merge_matches(&1, data))) + end + + defp match(data, triple_pattern, []), do: match(data, triple_pattern) + + defp match(data, triple_pattern, existing_solutions) do + data + |> match(triple_pattern) + |> Enum.flat_map(fn solution -> + Enum.map(existing_solutions, &(Map.merge(solution, &1))) + end) + end + + defp match(%Graph{descriptions: descriptions}, {subject_variable, _, _} = triple_pattern) + when is_binary(subject_variable) do + descriptions + |> Enum.reduce([], fn ({subject, description}, acc) -> + case match(description, solve_variables(subject_variable, subject, triple_pattern)) do + nil -> acc + solutions -> + Enum.map(solutions, fn solution -> + Map.put(solution, subject_variable, subject) + end) ++ acc + end + end) + end + + defp match(%Graph{} = graph, {subject, _, _} = triple_pattern) do + case graph[subject] do + nil -> nil + description -> match(description, triple_pattern) + end + end + + defp match(%Description{predications: predications}, + {_, predicate_variable, object_variable}) + when is_binary(predicate_variable) and is_binary(object_variable) do + if predicate_variable == object_variable do # repeated variable + Enum.reduce predications, [], fn ({predicate, objects}, solutions) -> + if Map.has_key?(objects, predicate) do + [%{predicate_variable => predicate} | solutions] + else + solutions + end + end + else + Enum.reduce predications, [], fn ({predicate, objects}, solutions) -> + solutions ++ + Enum.map(objects, fn {object, _} -> + %{predicate_variable => predicate, object_variable => object} + end) + end + end + end + + defp match(%Description{predications: predications}, + {_, predicate_variable, object}) when is_binary(predicate_variable) do + predications + |> Enum.reduce([], fn ({predicate, objects}, solutions) -> + if Map.has_key?(objects, object) do + [%{predicate_variable => predicate} | solutions] + else + solutions + end + end) + end + + defp match(%Description{predications: predications}, + {_, predicate, object_or_variable}) do + case predications[predicate] do + nil -> nil + objects -> cond do + # object_or_variable is a variable + is_binary(object_or_variable) -> + Enum.map(objects, fn {object, _} -> + %{object_or_variable => object} + end) + + # object_or_variable is a object + Map.has_key?(objects, object_or_variable) -> + [%{}] + + # else + true -> + nil + end + end + end + + defp solve_variables(var, val, {var, var, var}), do: {val, val, val} + defp solve_variables(var, val, {s, var, var}), do: {s, val, val} + defp solve_variables(var, val, {var, p, var}), do: {val, p, val} + defp solve_variables(var, val, {var, var, o}), do: {val, val, o} + defp solve_variables(var, val, {var, p, o}), do: {val, p, o} + defp solve_variables(var, val, {s, var, o}), do: {s, val, o} + defp solve_variables(var, val, {s, p, var}), do: {s, p, val} + defp solve_variables(_, _, pattern), do: pattern + + defp merge_matches({dependent_solution, triple_pattern}, data) do + case match(data, triple_pattern) do + nil -> [] + solutions -> + Enum.map solutions, fn solution -> + Map.merge(dependent_solution, solution) + end + end + end + + defp mark_solved_variables(triple_patterns, [solution | _]) do + Stream.map triple_patterns, fn {s, p, o} -> + { + (if is_binary(s) and Map.has_key?(solution, s), do: {s}, else: s), + (if is_binary(p) and Map.has_key?(solution, p), do: {p}, else: p), + (if is_binary(o) and Map.has_key?(solution, o), do: {o}, else: o) + } + end + end + + defp apply_solutions(triple_pattern, solutions) do + apply_solution = + case triple_pattern do + {{s}, {p}, {o}} -> fn solution -> {solution, {solution[s], solution[p], solution[o]}} end + {{s}, {p}, o } -> fn solution -> {solution, {solution[s], solution[p], o}} end + {{s}, p , {o}} -> fn solution -> {solution, {solution[s], p , solution[o]}} end + {{s}, p , o } -> fn solution -> {solution, {solution[s], p , o}} end + { s , {p}, {o}} -> fn solution -> {solution, {s , solution[p], solution[o]}} end + { s , {p} , o } -> fn solution -> {solution, {s , solution[p], o}} end + { s , p , {o}} -> fn solution -> {solution, {s , p , solution[o]}} end + _ -> nil + end + if apply_solution do + Stream.map(solutions, apply_solution) + else + solutions + end + end + + defp triple_priority({v, v, v}), do: triple_priority({v, :p, :o}) + defp triple_priority({v, v, o}), do: triple_priority({v, :p, o}) + defp triple_priority({v, p, v}), do: triple_priority({v, p, :o}) + defp triple_priority({s, v, v}), do: triple_priority({s, v, :o}) + defp triple_priority({s, p, o}) do + {sp, pp, op} = {value_priority(s), value_priority(p), value_priority(o)} + <<(sp + pp + op) :: size(2), sp :: size(1), pp :: size(1), op :: size(1)>> + end + + defp value_priority(value) when is_binary(value), do: 1 + defp value_priority(_), do: 0 +end diff --git a/test/unit/query/bgp/simple_test.exs b/test/unit/query/bgp/simple_test.exs new file mode 100644 index 0000000..5cca991 --- /dev/null +++ b/test/unit/query/bgp/simple_test.exs @@ -0,0 +1,231 @@ +defmodule RDF.Query.BGP.SimpleTest do + use RDF.Test.Case + + import RDF.Query.BGP.Simple, only: [query: 2] + + @example_graph Graph.new([ + {EX.s1, EX.p1, EX.o1}, + {EX.s1, EX.p2, EX.o2}, + {EX.s3, EX.p3, EX.o2} + ]) + + test "empty bgp" do + assert query(@example_graph, []) == [%{}] + end + + test "single {s ?p ?o}" do + assert query(@example_graph, [{EX.s1, "p", "o"}]) == + [ + %{"p" => EX.p1, "o" => EX.o1}, + %{"p" => EX.p2, "o" => EX.o2} + ] + end + + test "single {?s ?p o}" do + assert query(@example_graph, [{"s", "p", EX.o2}]) == + [ + %{"s" => EX.s3, "p" => EX.p3}, + %{"s" => EX.s1, "p" => EX.p2} + ] + end + + test "single {?s p ?o}" do + assert query(@example_graph, [{"s", EX.p3, "o"}]) == [%{"s" => EX.s3, "o" => EX.o2}] + end + + test "with no solutions" do + assert query(Graph.new(), [{"a", "b", "c"}]) == [] + end + + test "with solutions on one triple pattern but none on another one" do + example_graph = Graph.new([ + {EX.x, EX.y, EX.z}, + {EX.y, EX.y, EX.z}, + ]) + + assert query(example_graph, [ + {"a", EX.p1, ~L"unmatched" }, + {"a", EX.y, EX.z} + ]) == [] + end + + test "repeated variable: {?a ?a ?b}" do + example_graph = Graph.new([ + {EX.y, EX.y, EX.x}, + {EX.x, EX.y, EX.y}, + {EX.y, EX.x, EX.y} + ]) + + assert query(example_graph, [{"a", "a", "b"}]) == [%{"a" => EX.y, "b" => EX.x}] + end + + test "repeated variable: {?a ?b ?a}" do + example_graph = Graph.new([ + {EX.y, EX.y, EX.x}, + {EX.x, EX.y, EX.y}, + {EX.y, EX.x, EX.y} + ]) + + assert query(example_graph, [{"a", "b", "a"}]) == [%{"a" => EX.y, "b" => EX.x}] + end + + test "repeated variable: {?b ?a ?a}" do + example_graph = Graph.new([ + {EX.y, EX.y, EX.x}, + {EX.x, EX.y, EX.y}, + {EX.y, EX.x, EX.y} + ]) + + assert query(example_graph, [{"b", "a", "a"}]) == [%{"a" => EX.y, "b" => EX.x}] + end + + test "repeated variable: {?a ?a ?a}" do + example_graph = Graph.new([ + {EX.y, EX.y, EX.x}, + {EX.x, EX.y, EX.y}, + {EX.y, EX.x, EX.y}, + {EX.y, EX.y, EX.y}, + ]) + + assert query(example_graph, [{"a", "a", "a"}]) == [%{"a" => EX.y}] + end + + test "two connected triple patterns with a match" do + assert query(@example_graph, [ + {EX.s1, "p", "o"}, + {EX.s3, "p2", "o" } + ]) == [%{ + "p" => EX.p2, + "p2" => EX.p3, + "o" => EX.o2 + }] + + assert query(@example_graph, [ + {EX.s1, "p", "o1"}, + {EX.s1, "p", "o2"} + ]) == + [ + %{ + "p" => EX.p1, + "o1" => EX.o1, + "o2" => EX.o1, + }, + %{ + "p" => EX.p2, + "o1" => EX.o2, + "o2" => EX.o2, + }, + ] + + assert query( + Graph.new([ + {EX.s1, EX.p1, EX.o1}, + {EX.s3, EX.p2, EX.o2}, + {EX.s3, EX.p3, EX.o1} + ]), + [ + {EX.s1, EX.p1, "o"}, + {EX.s3, "p", "o"} + ]) == [%{"p" => EX.p3, "o" => EX.o1}] + end + + test "a triple pattern with dependent variables from separate triple patterns" do + assert query( + Graph.new([ + {EX.s1, EX.p1, EX.o1}, + {EX.s2, EX.p2, EX.o2}, + {EX.s3, EX.p2, EX.o1} + ]), + [ + {EX.s1, EX.p1, "o"}, + {EX.s2, "p", EX.o2}, + {"s", "p", "o"} + ] + ) == [ + %{ + "s" => EX.s3, + "p" => EX.p2, + "o" => EX.o1, + }, + ] + end + + test "when no solutions" do + assert query(@example_graph, [{EX.s, EX.p, "o"}]) == [] + end + + test "multiple triple patterns with a constant unmatched triple has no solutions" do + assert query(@example_graph, [ + {EX.s1, "p", "o"}, + {EX.s, EX.p, EX.o} + ]) == [] + end + + test "independent triple patterns lead to cross-products" do + assert query(@example_graph, [ + {EX.s1, "p1", "o"}, + {"s", "p2", EX.o2} + ]) == [ + %{ + "p1" => EX.p1, + "o" => EX.o1, + "s" => EX.s3, + "p2" => EX.p3, + }, + %{ + "p1" => EX.p2, + "o" => EX.o2, + "s" => EX.s3, + "p2" => EX.p3, + }, + %{ + "p1" => EX.p1, + "o" => EX.o1, + "s" => EX.s1, + "p2" => EX.p2, + }, + %{ + "p1" => EX.p2, + "o" => EX.o2, + "s" => EX.s1, + "p2" => EX.p2, + }, + ] + end + + test "blank nodes behave like variables, but don't appear in the solution" do + assert query(@example_graph, [ + {EX.s1, "p", RDF.bnode("o")}, + {EX.s3, "p2", RDF.bnode("o")} + ]) == [%{"p" => EX.p2, "p2" => EX.p3}] + end + + test "cross-product with blank nodes" do + assert query(@example_graph, [ + {EX.s1, "p1", "o"}, + {RDF.bnode("s"), "p2", EX.o2} + ]) == + [ + %{ + "p1" => EX.p1, + "o" => EX.o1, + "p2" => EX.p3, + }, + %{ + "p1" => EX.p2, + "o" => EX.o2, + "p2" => EX.p3, + }, + %{ + "p1" => EX.p1, + "o" => EX.o1, + "p2" => EX.p2, + }, + %{ + "p1" => EX.p2, + "o" => EX.o2, + "p2" => EX.p2, + }, + ] + end +end