Improve performance of the Enumerable impls of the RDF data structures
This commit is contained in:
parent
bb29582695
commit
e1680ffa95
7 changed files with 87 additions and 50 deletions
|
@ -14,6 +14,10 @@ This project adheres to [Semantic Versioning](http://semver.org/) and
|
|||
|
||||
### Changed
|
||||
|
||||
- the performance of the `Enumerable` protocol implementations of the RDF data
|
||||
structures was significantly improved (for graphs almost 10x), which in turn
|
||||
increases the performance of all functions built on top of that, eg.
|
||||
the N-Triples and N-Quads encoders
|
||||
- improvement of the Inspect forms of the RDF data structures: the content is
|
||||
now enclosed in angle brackets and indented
|
||||
|
||||
|
@ -21,7 +25,7 @@ This project adheres to [Semantic Versioning](http://semver.org/) and
|
|||
|
||||
- strings of the form `".0"` and `"0."` weren't recognized as valid XSD float
|
||||
and double literals
|
||||
- the Turtle encoder handle base URIs without a trailing slash or hash properly
|
||||
- the Turtle encoder handles base URIs without a trailing slash or hash properly
|
||||
(no longer raising a warning and ignoring them)
|
||||
|
||||
|
||||
|
|
|
@ -639,20 +639,15 @@ defmodule RDF.Dataset do
|
|||
...> {EX.S2, EX.p2, EX.O2},
|
||||
...> {EX.S1, EX.p2, EX.O3}]) |>
|
||||
...> RDF.Dataset.statements
|
||||
[{RDF.iri(EX.S1), RDF.iri(EX.p1), RDF.iri(EX.O1), RDF.iri(EX.Graph)},
|
||||
{RDF.iri(EX.S1), RDF.iri(EX.p2), RDF.iri(EX.O3)},
|
||||
{RDF.iri(EX.S2), RDF.iri(EX.p2), RDF.iri(EX.O2)}]
|
||||
[{RDF.iri(EX.S1), RDF.iri(EX.p2), RDF.iri(EX.O3)},
|
||||
{RDF.iri(EX.S2), RDF.iri(EX.p2), RDF.iri(EX.O2)},
|
||||
{RDF.iri(EX.S1), RDF.iri(EX.p1), RDF.iri(EX.O1), RDF.iri(EX.Graph)}]
|
||||
"""
|
||||
@spec statements(t) :: [Statement.t()]
|
||||
def statements(%__MODULE__{} = dataset) do
|
||||
Enum.reduce(dataset.graphs, [], fn {_, graph}, all_statements ->
|
||||
statements = Graph.triples(graph)
|
||||
|
||||
if graph.name do
|
||||
Enum.map(statements, fn {s, p, o} -> {s, p, o, graph.name} end)
|
||||
else
|
||||
statements
|
||||
end ++ all_statements
|
||||
Enum.flat_map(dataset.graphs, fn
|
||||
{nil, graph} -> Graph.triples(graph)
|
||||
{name, graph} -> Enum.map(graph, fn {s, p, o} -> {s, p, o, name} end)
|
||||
end)
|
||||
end
|
||||
|
||||
|
@ -884,21 +879,16 @@ defmodule RDF.Dataset do
|
|||
|
||||
def member?(dataset, statement), do: {:ok, Dataset.include?(dataset, statement)}
|
||||
def count(dataset), do: {:ok, Dataset.statement_count(dataset)}
|
||||
def slice(_dataset), do: {:error, __MODULE__}
|
||||
|
||||
def reduce(%Dataset{graphs: graphs}, {:cont, acc}, _fun)
|
||||
when map_size(graphs) == 0,
|
||||
do: {:done, acc}
|
||||
|
||||
def reduce(%Dataset{} = dataset, {:cont, acc}, fun) do
|
||||
{statement, rest} = Dataset.pop(dataset)
|
||||
reduce(rest, fun.(statement, acc), fun)
|
||||
def slice(dataset) do
|
||||
size = Dataset.statement_count(dataset)
|
||||
{:ok, size, &Enumerable.List.slice(Dataset.statements(dataset), &1, &2, size)}
|
||||
end
|
||||
|
||||
def reduce(_, {:halt, acc}, _fun), do: {:halted, acc}
|
||||
|
||||
def reduce(%Dataset{} = dataset, {:suspend, acc}, fun) do
|
||||
{:suspended, acc, &reduce(dataset, &1, fun)}
|
||||
def reduce(dataset, acc, fun) do
|
||||
dataset
|
||||
|> Dataset.statements()
|
||||
|> Enumerable.List.reduce(acc, fun)
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -622,7 +622,11 @@ defmodule RDF.Description do
|
|||
The list of all triples within a `RDF.Description`.
|
||||
"""
|
||||
@spec triples(t) :: keyword
|
||||
def triples(%__MODULE__{} = description), do: Enum.to_list(description)
|
||||
def triples(%__MODULE__{subject: s} = description) do
|
||||
Enum.flat_map(description.predications, fn {p, os} ->
|
||||
Enum.map(os, fn {o, _} -> {s, p, o} end)
|
||||
end)
|
||||
end
|
||||
|
||||
defdelegate statements(description), to: __MODULE__, as: :triples
|
||||
|
||||
|
@ -821,22 +825,18 @@ defmodule RDF.Description do
|
|||
alias RDF.Description
|
||||
|
||||
def member?(desc, triple), do: {:ok, Description.include?(desc, triple)}
|
||||
|
||||
def count(desc), do: {:ok, Description.statement_count(desc)}
|
||||
def slice(_desc), do: {:error, __MODULE__}
|
||||
|
||||
def reduce(%Description{predications: predications}, {:cont, acc}, _fun)
|
||||
when map_size(predications) == 0,
|
||||
do: {:done, acc}
|
||||
|
||||
def reduce(%Description{} = description, {:cont, acc}, fun) do
|
||||
{triple, rest} = Description.pop(description)
|
||||
reduce(rest, fun.(triple, acc), fun)
|
||||
def slice(desc) do
|
||||
size = Description.statement_count(desc)
|
||||
{:ok, size, &Enumerable.List.slice(Description.triples(desc), &1, &2, size)}
|
||||
end
|
||||
|
||||
def reduce(_, {:halt, acc}, _fun), do: {:halted, acc}
|
||||
|
||||
def reduce(%Description{} = description, {:suspend, acc}, fun) do
|
||||
{:suspended, acc, &reduce(description, &1, fun)}
|
||||
def reduce(desc, acc, fun) do
|
||||
desc
|
||||
|> Description.triples()
|
||||
|> Enumerable.List.reduce(acc, fun)
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -786,7 +786,11 @@ defmodule RDF.Graph do
|
|||
{RDF.iri(EX.S2), RDF.iri(EX.p2), RDF.iri(EX.O2)}]
|
||||
"""
|
||||
@spec triples(t) :: [Statement.t()]
|
||||
def triples(%__MODULE__{} = graph), do: Enum.to_list(graph)
|
||||
def triples(%__MODULE__{} = graph) do
|
||||
Enum.flat_map(graph.descriptions, fn {_, description} ->
|
||||
Description.triples(description)
|
||||
end)
|
||||
end
|
||||
|
||||
defdelegate statements(graph), to: __MODULE__, as: :triples
|
||||
|
||||
|
@ -1088,21 +1092,16 @@ defmodule RDF.Graph do
|
|||
|
||||
def member?(graph, triple), do: {:ok, Graph.include?(graph, triple)}
|
||||
def count(graph), do: {:ok, Graph.statement_count(graph)}
|
||||
def slice(_graph), do: {:error, __MODULE__}
|
||||
|
||||
def reduce(%Graph{descriptions: descriptions}, {:cont, acc}, _fun)
|
||||
when map_size(descriptions) == 0,
|
||||
do: {:done, acc}
|
||||
|
||||
def reduce(%Graph{} = graph, {:cont, acc}, fun) do
|
||||
{triple, rest} = Graph.pop(graph)
|
||||
reduce(rest, fun.(triple, acc), fun)
|
||||
def slice(graph) do
|
||||
size = Graph.statement_count(graph)
|
||||
{:ok, size, &Enumerable.List.slice(Graph.triples(graph), &1, &2, size)}
|
||||
end
|
||||
|
||||
def reduce(_, {:halt, acc}, _fun), do: {:halted, acc}
|
||||
|
||||
def reduce(%Graph{} = graph, {:suspend, acc}, fun) do
|
||||
{:suspended, acc, &reduce(graph, &1, fun)}
|
||||
def reduce(graph, acc, fun) do
|
||||
graph
|
||||
|> Graph.triples()
|
||||
|> Enumerable.List.reduce(acc, fun)
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -1765,6 +1765,21 @@ defmodule RDF.DatasetTest do
|
|||
PrefixMap.new(ex: EX, foo: RDFS)
|
||||
end
|
||||
|
||||
test "statements/1" do
|
||||
assert Dataset.new([
|
||||
{EX.S1, EX.p1(), EX.O1},
|
||||
{EX.S1, EX.p2(), EX.O2},
|
||||
{EX.S1, EX.p2(), EX.O2, EX.GraphName},
|
||||
{EX.S2, EX.p2(), EX.O2, EX.GraphName}
|
||||
])
|
||||
|> Dataset.statements() == [
|
||||
{RDF.iri(EX.S1), EX.p1(), RDF.iri(EX.O1)},
|
||||
{RDF.iri(EX.S1), EX.p2(), RDF.iri(EX.O2)},
|
||||
{RDF.iri(EX.S1), EX.p2(), RDF.iri(EX.O2), RDF.iri(EX.GraphName)},
|
||||
{RDF.iri(EX.S2), EX.p2(), RDF.iri(EX.O2), RDF.iri(EX.GraphName)}
|
||||
]
|
||||
end
|
||||
|
||||
describe "Enumerable protocol" do
|
||||
test "Enum.count" do
|
||||
assert Enum.count(Dataset.new(name: EX.foo())) == 0
|
||||
|
|
|
@ -894,6 +894,21 @@ defmodule RDF.DescriptionTest do
|
|||
|> Description.equal?(Description.new(EX.S, init: {EX.S, EX.p(), EX.O2}))
|
||||
end
|
||||
|
||||
test "triples/1" do
|
||||
assert Description.new(EX.Subject,
|
||||
init: [
|
||||
{EX.predicate1(), EX.Object1},
|
||||
{EX.predicate2(), EX.Object2},
|
||||
{EX.predicate2(), EX.Object3}
|
||||
]
|
||||
)
|
||||
|> Description.triples() == [
|
||||
{RDF.iri(EX.Subject), EX.predicate1(), RDF.iri(EX.Object1)},
|
||||
{RDF.iri(EX.Subject), EX.predicate2(), RDF.iri(EX.Object2)},
|
||||
{RDF.iri(EX.Subject), EX.predicate2(), RDF.iri(EX.Object3)}
|
||||
]
|
||||
end
|
||||
|
||||
describe "Enumerable protocol" do
|
||||
test "Enum.count" do
|
||||
assert Enum.count(Description.new(EX.foo())) == 0
|
||||
|
|
|
@ -1448,6 +1448,20 @@ defmodule RDF.GraphTest do
|
|||
Graph.new()
|
||||
end
|
||||
|
||||
test "triples/1" do
|
||||
assert Graph.new([
|
||||
{EX.S1, EX.p1(), EX.O1},
|
||||
{EX.S2, EX.p2(), EX.O2},
|
||||
{EX.S1, EX.p3(), EX.O3}
|
||||
])
|
||||
|> Graph.triples() ==
|
||||
[
|
||||
{RDF.iri(EX.S1), EX.p1(), RDF.iri(EX.O1)},
|
||||
{RDF.iri(EX.S1), EX.p3(), RDF.iri(EX.O3)},
|
||||
{RDF.iri(EX.S2), EX.p2(), RDF.iri(EX.O2)}
|
||||
]
|
||||
end
|
||||
|
||||
describe "Enumerable protocol" do
|
||||
test "Enum.count" do
|
||||
assert Enum.count(Graph.new(name: EX.foo())) == 0
|
||||
|
|
Loading…
Reference in a new issue