From ef218a3a902c302ce1e1e5a9a91270a146bcba17 Mon Sep 17 00:00:00 2001 From: Marcel Otto Date: Fri, 5 Jun 2020 11:10:15 +0200 Subject: [PATCH] Extract BGP query planner --- lib/rdf/query/bgp/simple.ex | 84 ++++++++++++++++++----------- test/unit/query/bgp/simple_test.exs | 35 ++++++++++++ 2 files changed, 88 insertions(+), 31 deletions(-) diff --git a/lib/rdf/query/bgp/simple.ex b/lib/rdf/query/bgp/simple.ex index 49630db..b2082ae 100644 --- a/lib/rdf/query/bgp/simple.ex +++ b/lib/rdf/query/bgp/simple.ex @@ -1,6 +1,53 @@ defmodule RDF.Query.BGP.Simple do @behaviour RDF.Query.BGP + defmodule Planner do + def query_plan(triple_patterns, solved \\ MapSet.new, plan \\ []) + + def query_plan([], _, plan), do: Enum.reverse(plan) + + def query_plan(triple_patterns, solved, plan) do + [next_best | rest] = Enum.sort_by(triple_patterns, &triple_priority/1) + new_solved = MapSet.union(solved, variables(next_best)) + + query_plan( + mark_solved_variables(rest, new_solved), + new_solved, + [next_best | plan]) + end + + defp variables({v1, v2, v3}) when is_binary(v1) and is_binary(v2) and is_binary(v3), do: MapSet.new([v1, v2, v3]) + defp variables({_, v2, v3}) when is_binary(v2) and is_binary(v3), do: MapSet.new([v2, v3]) + defp variables({v1, _, v3}) when is_binary(v1) and is_binary(v3), do: MapSet.new([v1, v3]) + defp variables({v1, v2, _}) when is_binary(v1) and is_binary(v2), do: MapSet.new([v1, v2]) + defp variables({v1, _, _}) when is_binary(v1), do: MapSet.new([v1]) + defp variables({_, v2, _}) when is_binary(v2), do: MapSet.new([v2]) + defp variables({_, _, v3}) when is_binary(v3), do: MapSet.new([v3]) + defp variables(_), do: MapSet.new() + + defp triple_priority({v, v, v}), do: triple_priority({v, :p, :o}) + defp triple_priority({v, v, o}), do: triple_priority({v, :p, o}) + defp triple_priority({v, p, v}), do: triple_priority({v, p, :o}) + defp triple_priority({s, v, v}), do: triple_priority({s, v, :o}) + defp triple_priority({s, p, o}) do + {sp, pp, op} = {value_priority(s), value_priority(p), value_priority(o)} + <<(sp + pp + op) :: size(2), sp :: size(1), pp :: size(1), op :: size(1)>> + end + + defp value_priority(value) when is_binary(value), do: 1 + defp value_priority(_), do: 0 + + defp mark_solved_variables(triple_patterns, solved) do + Enum.map triple_patterns, fn {s, p, o} -> + { + (if is_binary(s) and MapSet.member?(solved, s), do: {s}, else: s), + (if is_binary(p) and MapSet.member?(solved, p), do: {p}, else: p), + (if is_binary(o) and MapSet.member?(solved, o), do: {o}, else: o) + } + end + end + end + alias RDF.{Graph, Description, BlankNode} @blank_node_prefix "_:" @@ -13,8 +60,8 @@ defmodule RDF.Query.BGP.Simple do def query(data, triple_patterns) do triple_patterns |> Stream.map(&convert_blank_nodes/1) - |> Enum.sort_by(&triple_priority/1) - |> do_matching(data) + |> Planner.query_plan() + |> do_query(data) |> Enum.map(&remove_blank_nodes/1) end @@ -34,18 +81,15 @@ defmodule RDF.Query.BGP.Simple do end - defp do_matching(triple_patterns, data, solutions \\ []) + defp do_query(triple_patterns, data, solutions \\ []) - defp do_matching([], _, solutions), do: solutions + defp do_query([], _, solutions), do: solutions - defp do_matching([triple_pattern | remaining], data, acc) do + defp do_query([triple_pattern | remaining], data, acc) do solutions = match(data, triple_pattern, acc) if solutions not in [nil, []] do - remaining - |> mark_solved_variables(solutions) - |> Enum.sort_by(&triple_priority/1) - |> do_matching(data, solutions) + do_query(remaining, data, solutions) else [] end @@ -164,16 +208,6 @@ defmodule RDF.Query.BGP.Simple do end end - defp mark_solved_variables(triple_patterns, [solution | _]) do - Stream.map triple_patterns, fn {s, p, o} -> - { - (if is_binary(s) and Map.has_key?(solution, s), do: {s}, else: s), - (if is_binary(p) and Map.has_key?(solution, p), do: {p}, else: p), - (if is_binary(o) and Map.has_key?(solution, o), do: {o}, else: o) - } - end - end - defp apply_solutions(triple_pattern, solutions) do apply_solution = case triple_pattern do @@ -192,16 +226,4 @@ defmodule RDF.Query.BGP.Simple do solutions end end - - defp triple_priority({v, v, v}), do: triple_priority({v, :p, :o}) - defp triple_priority({v, v, o}), do: triple_priority({v, :p, o}) - defp triple_priority({v, p, v}), do: triple_priority({v, p, :o}) - defp triple_priority({s, v, v}), do: triple_priority({s, v, :o}) - defp triple_priority({s, p, o}) do - {sp, pp, op} = {value_priority(s), value_priority(p), value_priority(o)} - <<(sp + pp + op) :: size(2), sp :: size(1), pp :: size(1), op :: size(1)>> - end - - defp value_priority(value) when is_binary(value), do: 1 - defp value_priority(_), do: 0 end diff --git a/test/unit/query/bgp/simple_test.exs b/test/unit/query/bgp/simple_test.exs index 5cca991..3af058e 100644 --- a/test/unit/query/bgp/simple_test.exs +++ b/test/unit/query/bgp/simple_test.exs @@ -1,6 +1,7 @@ defmodule RDF.Query.BGP.SimpleTest do use RDF.Test.Case + alias RDF.Query.BGP import RDF.Query.BGP.Simple, only: [query: 2] @example_graph Graph.new([ @@ -228,4 +229,38 @@ defmodule RDF.Query.BGP.SimpleTest do }, ] end + + describe "Planner.query_plan/1" do + alias BGP.Simple.Planner + + test "empty" do + assert Planner.query_plan([]) == [] + end + + test "single" do + assert Planner.query_plan([{"a", "b", "c"}]) == [{"a", "b", "c"}] + end + + test "multiple connected" do + assert Planner.query_plan([ + {"a", "b", "c"}, + {"a", "d", ~L"foo"} + ]) == [ + {"a", "d", ~L"foo"}, + {{"a"}, "b", "c"} + ] + + assert Planner.query_plan([ + {"s", "p", "o"}, + {"s2", "p2", "o2"}, + {"s", "p", "o2"}, + {"s4", "p4", ~L"foo"} + ]) == [ + {"s4", "p4", ~L"foo"}, + {"s", "p", "o"}, + {{"s"}, {"p"}, "o2"}, + {"s2", "p2", {"o2"}}, + ] + end + end end