defmodule RDF.Dataset do @moduledoc """ Defines a RDF Dataset. A `RDF.Dataset` represents a set of `RDF.Dataset`s. """ defstruct name: nil, graphs: %{} @behaviour Access alias RDF.{Graph, Description} import RDF.Statement @type t :: module @doc """ Creates an empty unnamed `RDF.Dataset`. """ def new, do: %RDF.Dataset{} @doc """ Creates an unnamed `RDF.Dataset` with an initial statement. """ def new(statement) when is_tuple(statement), do: new() |> add(statement) @doc """ Creates an unnamed `RDF.Dataset` with initial statements. """ def new(statements) when is_list(statements), do: new() |> add(statements) @doc """ Creates an unnamed `RDF.Dataset` with a `RDF.Description`. """ def new(%RDF.Description{} = description), do: new() |> add(description) @doc """ Creates an unnamed `RDF.Dataset` with a `RDF.Graph`. """ def new(%RDF.Graph{} = graph), do: new() |> add(graph, graph.name) @doc """ Creates an empty named `RDF.Dataset`. """ def new(name), do: %RDF.Dataset{name: RDF.uri(name)} @doc """ Creates a named `RDF.Dataset` with an initial statement. """ def new(name, statement) when is_tuple(statement), do: new(name) |> add(statement) @doc """ Creates a named `RDF.Dataset` with initial statements. """ def new(name, statements) when is_list(statements), do: new(name) |> add(statements) @doc """ Creates a named `RDF.Dataset` with a `RDF.Description`. """ def new(name, %RDF.Description{} = description), do: new(name) |> add(description) @doc """ Creates a named `RDF.Dataset` with a `RDF.Graph`. """ def new(name, %RDF.Graph{} = graph), do: new(name) |> add(graph, graph.name) @doc """ Adds triples and quads to a `RDF.Dataset`. The optional third argument `graph_context` defaulting to `nil` for the default graph, specifies the graph to which the statements are added. Note: This also applies when adding a named graph. Its name is ignored over `graph_context` and its default value. """ def add(dataset, statements, graph_context \\ nil) def add(dataset, statements, graph_context) when is_list(statements) do with graph_context = convert_graph_name(graph_context) do Enum.reduce statements, dataset, fn (statement, dataset) -> add(dataset, statement, graph_context) end end end def add(dataset, {subject, predicate, objects}, graph_context), do: add(dataset, {subject, predicate, objects, graph_context}) def add(%RDF.Dataset{name: name, graphs: graphs}, {subject, predicate, objects, graph_context}, _) do with graph_context = convert_graph_name(graph_context) do updated_graphs = Map.update(graphs, graph_context, Graph.new(graph_context, {subject, predicate, objects}), fn graph -> Graph.add(graph, {subject, predicate, objects}) end) %RDF.Dataset{name: name, graphs: updated_graphs} end end def add(%RDF.Dataset{name: name, graphs: graphs}, %Description{} = description, graph_context) do with graph_context = convert_graph_name(graph_context) do updated_graph = Map.get(graphs, graph_context, Graph.new(graph_context)) |> Graph.add(description) %RDF.Dataset{ name: name, graphs: Map.put(graphs, graph_context, updated_graph) } end end def add(%RDF.Dataset{name: name, graphs: graphs}, %Graph{} = graph, graph_context) do with graph_context = convert_graph_name(graph_context) do %RDF.Dataset{name: name, graphs: Map.update(graphs, graph_context, Graph.new(graph_context, graph), fn current -> current |> Graph.add(graph) end) } end end @doc """ Adds statements to a `RDF.Dataset` and overwrites all existing statements with the same subjects and predicates in the specified graph context. # Examples iex> dataset = RDF.Dataset.new({EX.S, EX.P1, EX.O1}) ...> RDF.Dataset.put(dataset, {EX.S, EX.P1, EX.O2}) RDF.Dataset.new({EX.S, EX.P1, EX.O2}) iex> RDF.Dataset.put(dataset, {EX.S, EX.P2, EX.O2}) RDF.Dataset.new([{EX.S, EX.P1, EX.O1}, {EX.S, EX.P2, EX.O2}]) iex> RDF.Dataset.new([{EX.S1, EX.P1, EX.O1}, {EX.S2, EX.P2, EX.O2}]) |> ...> RDF.Dataset.put([{EX.S1, EX.P2, EX.O3}, {EX.S2, EX.P2, EX.O3}]) RDF.Dataset.new([{EX.S1, EX.P1, EX.O1}, {EX.S1, EX.P2, EX.O3}, {EX.S2, EX.P2, EX.O3}]) Note: When using a map to pass the statements you'll have to take care for yourselve to avoid using subject key clashes due to using inconsistent, semantically equivalent forms. iex> RDF.Dataset.put(RDF.Dataset.new, %{ ...> EX.S => [{EX.P, EX.O1}], ...> {EX.S, nil} => [{EX.P, EX.O2}]}) RDF.Dataset.new({EX.S, EX.P, EX.O2}) The last always always wins in these cases. This decision was made to mitigate performance drawbacks. The list form will always take care of this for you: iex> RDF.Dataset.put(RDF.Dataset.new, [ ...> {EX.S, EX.P, EX.O1}, ...> {EX.S, EX.P, EX.O2, nil}]) RDF.Dataset.new({EX.S, EX.P, EX.O1}, {EX.S, EX.P, EX.O2}) """ def put(dataset, statements, graph_context \\ nil) def put(%RDF.Dataset{} = dataset, {subject, predicate, objects}, graph_context), do: put(dataset, {subject, predicate, objects, graph_context}) def put(%RDF.Dataset{name: name, graphs: graphs}, {subject, predicate, objects, graph_context}, _) do with graph_context = convert_graph_name(graph_context) do new_graph = case graphs[graph_context] do graph = %Graph{} -> Graph.put(graph, {subject, predicate, objects}) nil -> Graph.new(graph_context, {subject, predicate, objects}) end %RDF.Dataset{name: name, graphs: Map.put(graphs, graph_context, new_graph)} end end def put(%RDF.Dataset{} = dataset, statements, graph_context) when is_list(statements) do with graph_context = convert_graph_name(graph_context) do put dataset, Enum.group_by(statements, fn {s, _, _, nil} -> s {s, _, _, c} -> {s, c} {s, _, _} when is_nil(graph_context) -> s {s, _, _} -> {s, graph_context} end, fn {_, p, o, _} -> {p, o} {_, p, o} -> {p, o} end) end end def put(%RDF.Dataset{name: name, graphs: graphs}, %Description{} = description, graph_context) do with graph_context = convert_graph_name(graph_context) do updated_graph = Map.get(graphs, graph_context, Graph.new(graph_context)) |> Graph.put(description) %RDF.Dataset{ name: name, graphs: Map.put(graphs, graph_context, updated_graph) } end end def put(%RDF.Dataset{name: name, graphs: graphs}, %Graph{} = graph, graph_context) do with graph_context = convert_graph_name(graph_context) do %RDF.Dataset{name: name, graphs: Map.update(graphs, graph_context, Graph.new(graph_context, graph), fn current -> current |> Graph.put(graph) end) } end end def put(%RDF.Dataset{} = dataset, statements, graph_context) when is_map(statements) do with graph_context = convert_graph_name(graph_context) do Enum.reduce statements, dataset, fn ({subject_with_context, predications}, dataset) -> put(dataset, subject_with_context, predications, graph_context) end end end def put(%RDF.Dataset{name: name, graphs: graphs}, {subject, graph_context}, predications, default_graph_context) when is_list(predications) do with graph_context = graph_context || default_graph_context, graph_context = convert_graph_name(graph_context) do graph = Map.get(graphs, graph_context, Graph.new(graph_context)) new_graphs = graphs |> Map.put(graph_context, Graph.put(graph, subject, predications)) %RDF.Dataset{name: name, graphs: new_graphs} end end def put(%RDF.Dataset{} = dataset, subject, predications, graph_context) when is_list(predications), do: put(dataset, {subject, graph_context}, predications, graph_context) @doc """ Fetches the `RDF.Graph` with the given name. When a graph with the given name can not be found can not be found `:error` is returned. # Examples iex> dataset = RDF.Dataset.new([{EX.S1, EX.P1, EX.O1, EX.Graph}, {EX.S2, EX.P2, EX.O2}]) ...> RDF.Dataset.fetch(dataset, EX.Graph) {:ok, RDF.Graph.new(EX.Graph, {EX.S1, EX.P1, EX.O1})} iex> RDF.Dataset.fetch(dataset, nil) {:ok, RDF.Graph.new({EX.S2, EX.P2, EX.O2})} iex> RDF.Dataset.fetch(dataset, EX.Foo) :error """ def fetch(%RDF.Dataset{graphs: graphs}, graph_name) do Access.fetch(graphs, convert_graph_name(graph_name)) end @doc """ Fetches the `RDF.Graph` with the given name. When a graph with the given name can not be found can not be found the optionally given default value or `nil` is returned # Examples iex> dataset = RDF.Dataset.new([{EX.S1, EX.P1, EX.O1, EX.Graph}, {EX.S2, EX.P2, EX.O2}]) ...> RDF.Dataset.get(dataset, EX.Graph) RDF.Graph.new(EX.Graph, {EX.S1, EX.P1, EX.O1}) iex> RDF.Dataset.get(dataset, nil) RDF.Graph.new({EX.S2, EX.P2, EX.O2}) iex> RDF.Dataset.get(dataset, EX.Foo) nil iex> RDF.Dataset.get(dataset, EX.Foo, :bar) :bar """ def get(%RDF.Dataset{} = dataset, graph_name, default \\ nil) do case fetch(dataset, graph_name) do {:ok, value} -> value :error -> default end end @doc """ The graph with given name. """ def graph(%RDF.Dataset{graphs: graphs}, graph_name), do: Map.get(graphs, convert_graph_name(graph_name)) @doc """ The default graph of a `RDF.Dataset`. """ def default_graph(%RDF.Dataset{graphs: graphs}), do: Map.get(graphs, nil, Graph.new) @doc """ The set of all graphs. """ def graphs(%RDF.Dataset{graphs: graphs}), do: graphs @doc """ Gets and updates the graph with the given name, in a single pass. Invokes the passed function on the `RDF.Graph` with the given name; this function should return either `{graph_to_return, new_graph}` or `:pop`. If the passed function returns `{graph_to_return, new_graph}`, the return value of `get_and_update` is `{graph_to_return, new_dataset}` where `new_dataset` is the input `Dataset` updated with `new_graph` for the given name. If the passed function returns `:pop` the graph with the given name is removed and a `{removed_graph, new_dataset}` tuple gets returned. # Examples iex> dataset = RDF.Dataset.new({EX.S, EX.P, EX.O, EX.Graph}) ...> RDF.Dataset.get_and_update(dataset, EX.Graph, fn current_graph -> ...> {current_graph, {EX.S, EX.P, EX.NEW}} ...> end) {RDF.Graph.new(EX.Graph, {EX.S, EX.P, EX.O}), RDF.Dataset.new({EX.S, EX.P, EX.NEW, EX.Graph})} """ def get_and_update(%RDF.Dataset{} = dataset, graph_name, fun) do with graph_context = convert_graph_name(graph_name) do case fun.(get(dataset, graph_context)) do {old_graph, new_graph} -> {old_graph, put(dataset, new_graph, graph_context)} :pop -> pop(dataset, graph_context) other -> raise "the given function must return a two-element tuple or :pop, got: #{inspect(other)}" end end end @doc """ Pops the graph with the given name. When a graph with given name can not be found the optionally given default value or `nil` is returned. # Examples iex> dataset = RDF.Dataset.new([ ...> {EX.S1, EX.P1, EX.O1, EX.Graph}, ...> {EX.S2, EX.P2, EX.O2}]) ...> RDF.Dataset.pop(dataset, EX.Graph) {RDF.Graph.new(EX.Graph, {EX.S1, EX.P1, EX.O1}), RDF.Dataset.new({EX.S2, EX.P2, EX.O2})} iex> RDF.Dataset.pop(dataset, EX.Foo) {nil, dataset} """ def pop(%RDF.Dataset{name: name, graphs: graphs} = dataset, graph_name) do case Access.pop(graphs, convert_graph_name(graph_name)) do {nil, _} -> {nil, dataset} {graph, new_graphs} -> {graph, %RDF.Dataset{name: name, graphs: new_graphs}} end end @doc """ The number of statements within a `RDF.Dataset`. # Examples iex> RDF.Dataset.new([ ...> {EX.S1, EX.p1, EX.O1, EX.Graph}, ...> {EX.S2, EX.p2, EX.O2}, ...> {EX.S1, EX.p2, EX.O3}]) |> ...> RDF.Dataset.statement_count 3 """ def statement_count(%RDF.Dataset{graphs: graphs}) do Enum.reduce graphs, 0, fn ({_, graph}, count) -> count + Graph.triple_count(graph) end end @doc """ The set of all subjects used in the statement within all graphs of a `RDF.Dataset`. # Examples iex> RDF.Dataset.new([ ...> {EX.S1, EX.p1, EX.O1, EX.Graph}, ...> {EX.S2, EX.p2, EX.O2}, ...> {EX.S1, EX.p2, EX.O3}]) |> ...> RDF.Dataset.subjects MapSet.new([RDF.uri(EX.S1), RDF.uri(EX.S2)]) """ def subjects(%RDF.Dataset{graphs: graphs}) do Enum.reduce graphs, MapSet.new, fn ({_, graph}, subjects) -> MapSet.union(subjects, Graph.subjects(graph)) end end @doc """ The set of all properties used in the predicates within all graphs of a `RDF.Dataset`. # Examples iex> RDF.Dataset.new([ ...> {EX.S1, EX.p1, EX.O1, EX.Graph}, ...> {EX.S2, EX.p2, EX.O2}, ...> {EX.S1, EX.p2, EX.O3}]) |> ...> RDF.Dataset.predicates MapSet.new([EX.p1, EX.p2]) """ def predicates(%RDF.Dataset{graphs: graphs}) do Enum.reduce graphs, MapSet.new, fn ({_, graph}, predicates) -> MapSet.union(predicates, Graph.predicates(graph)) end end @doc """ The set of all resources used in the objects within a `RDF.Dataset`. Note: This function does collect only URIs and BlankNodes, not Literals. # Examples iex> RDF.Dataset.new([ ...> {EX.S1, EX.p1, EX.O1, EX.Graph}, ...> {EX.S2, EX.p2, EX.O2, EX.Graph}, ...> {EX.S3, EX.p1, EX.O2}, ...> {EX.S4, EX.p2, RDF.bnode(:bnode)}, ...> {EX.S5, EX.p3, "foo"} ...> ]) |> RDF.Dataset.objects MapSet.new([RDF.uri(EX.O1), RDF.uri(EX.O2), RDF.bnode(:bnode)]) """ def objects(%RDF.Dataset{graphs: graphs}) do Enum.reduce graphs, MapSet.new, fn ({_, graph}, objects) -> MapSet.union(objects, Graph.objects(graph)) end end @doc """ The set of all resources used within a `RDF.Dataset`. # Examples iex> RDF.Dataset.new([ ...> {EX.S1, EX.p1, EX.O1, EX.Graph}, ...> {EX.S2, EX.p1, EX.O2, EX.Graph}, ...> {EX.S2, EX.p2, RDF.bnode(:bnode)}, ...> {EX.S3, EX.p1, "foo"} ...> ]) |> RDF.Dataset.resources MapSet.new([RDF.uri(EX.S1), RDF.uri(EX.S2), RDF.uri(EX.S3), RDF.uri(EX.O1), RDF.uri(EX.O2), RDF.bnode(:bnode), EX.p1, EX.p2]) """ def resources(%RDF.Dataset{graphs: graphs}) do Enum.reduce graphs, MapSet.new, fn ({_, graph}, resources) -> MapSet.union(resources, Graph.resources(graph)) end end @doc """ All statements within all graphs of a `RDF.Dataset`. # Examples iex> RDF.Dataset.new([ ...> {EX.S1, EX.p1, EX.O1, EX.Graph}, ...> {EX.S2, EX.p2, EX.O2}, ...> {EX.S1, EX.p2, EX.O3}]) |> ...> RDF.Dataset.statements [{RDF.uri(EX.S1), RDF.uri(EX.p1), RDF.uri(EX.O1), RDF.uri(EX.Graph)}, {RDF.uri(EX.S1), RDF.uri(EX.p2), RDF.uri(EX.O3)}, {RDF.uri(EX.S2), RDF.uri(EX.p2), RDF.uri(EX.O2)}] """ def statements(%RDF.Dataset{graphs: graphs}) do Enum.reduce graphs, [], fn ({_, graph}, all_statements) -> statements = Graph.triples(graph) if graph.name do Enum.map statements, fn {s, p, o} -> {s, p, o, graph.name} end else statements end ++ all_statements end end @doc """ Returns if a given statement is in a `RDF.Dataset`. # Examples iex> dataset = RDF.Dataset.new([ ...> {EX.S1, EX.p1, EX.O1, EX.Graph}, ...> {EX.S2, EX.p2, EX.O2}, ...> {EX.S1, EX.p2, EX.O3}]) ...> RDF.Dataset.include?(dataset, {EX.S1, EX.p1, EX.O1, EX.Graph}) true """ def include?(dataset, statement, graph_context \\ nil) def include?(%RDF.Dataset{graphs: graphs}, triple = {_, _, _}, graph_context) do with graph_context = convert_graph_name(graph_context) do if graph = graphs[graph_context] do Graph.include?(graph, triple) else false end end end def include?(%RDF.Dataset{} = description, {subject, predicate, object, graph_context}, _), do: include?(description, {subject, predicate, object}, graph_context) # TODO: Can/should we isolate and move the Enumerable specific part to the Enumerable implementation? def reduce(%RDF.Dataset{graphs: graphs}, {:cont, acc}, _fun) when map_size(graphs) == 0, do: {:done, acc} def reduce(%RDF.Dataset{} = dataset, {:cont, acc}, fun) do {statement, rest} = RDF.Dataset.pop(dataset) reduce(rest, fun.(statement, acc), fun) end def reduce(_, {:halt, acc}, _fun), do: {:halted, acc} def reduce(dataset = %RDF.Dataset{}, {:suspend, acc}, fun) do {:suspended, acc, &reduce(dataset, &1, fun)} end def pop(%RDF.Dataset{graphs: graphs} = dataset) when graphs == %{}, do: {nil, dataset} def pop(%RDF.Dataset{name: name, graphs: graphs}) do # # TODO: Find a faster way ... [{graph_name, graph}] = Enum.take(graphs, 1) {{s, p, o}, popped_graph} = Graph.pop(graph) popped = if Enum.empty?(popped_graph), do: graphs |> Map.delete(graph_name), else: graphs |> Map.put(graph_name, popped_graph) {{s, p, o, graph_name}, %RDF.Dataset{name: name, graphs: popped}} end end defimpl Enumerable, for: RDF.Dataset do def reduce(graph, acc, fun), do: RDF.Dataset.reduce(graph, acc, fun) def member?(graph, statement), do: {:ok, RDF.Dataset.include?(graph, statement)} def count(graph), do: {:ok, RDF.Dataset.statement_count(graph)} end