Improve performance of the Enumerable impls of the RDF data structures

This commit is contained in:
Marcel Otto 2021-03-05 23:23:06 +01:00
parent bb29582695
commit e1680ffa95
7 changed files with 87 additions and 50 deletions

View file

@ -14,6 +14,10 @@ This project adheres to [Semantic Versioning](http://semver.org/) and
### Changed
- the performance of the `Enumerable` protocol implementations of the RDF data
structures was significantly improved (for graphs almost 10x), which in turn
increases the performance of all functions built on top of that, eg.
the N-Triples and N-Quads encoders
- improvement of the Inspect forms of the RDF data structures: the content is
now enclosed in angle brackets and indented
@ -21,7 +25,7 @@ This project adheres to [Semantic Versioning](http://semver.org/) and
- strings of the form `".0"` and `"0."` weren't recognized as valid XSD float
and double literals
- the Turtle encoder handle base URIs without a trailing slash or hash properly
- the Turtle encoder handles base URIs without a trailing slash or hash properly
(no longer raising a warning and ignoring them)

View file

@ -639,20 +639,15 @@ defmodule RDF.Dataset do
...> {EX.S2, EX.p2, EX.O2},
...> {EX.S1, EX.p2, EX.O3}]) |>
...> RDF.Dataset.statements
[{RDF.iri(EX.S1), RDF.iri(EX.p1), RDF.iri(EX.O1), RDF.iri(EX.Graph)},
{RDF.iri(EX.S1), RDF.iri(EX.p2), RDF.iri(EX.O3)},
{RDF.iri(EX.S2), RDF.iri(EX.p2), RDF.iri(EX.O2)}]
[{RDF.iri(EX.S1), RDF.iri(EX.p2), RDF.iri(EX.O3)},
{RDF.iri(EX.S2), RDF.iri(EX.p2), RDF.iri(EX.O2)},
{RDF.iri(EX.S1), RDF.iri(EX.p1), RDF.iri(EX.O1), RDF.iri(EX.Graph)}]
"""
@spec statements(t) :: [Statement.t()]
def statements(%__MODULE__{} = dataset) do
Enum.reduce(dataset.graphs, [], fn {_, graph}, all_statements ->
statements = Graph.triples(graph)
if graph.name do
Enum.map(statements, fn {s, p, o} -> {s, p, o, graph.name} end)
else
statements
end ++ all_statements
Enum.flat_map(dataset.graphs, fn
{nil, graph} -> Graph.triples(graph)
{name, graph} -> Enum.map(graph, fn {s, p, o} -> {s, p, o, name} end)
end)
end
@ -884,21 +879,16 @@ defmodule RDF.Dataset do
def member?(dataset, statement), do: {:ok, Dataset.include?(dataset, statement)}
def count(dataset), do: {:ok, Dataset.statement_count(dataset)}
def slice(_dataset), do: {:error, __MODULE__}
def reduce(%Dataset{graphs: graphs}, {:cont, acc}, _fun)
when map_size(graphs) == 0,
do: {:done, acc}
def reduce(%Dataset{} = dataset, {:cont, acc}, fun) do
{statement, rest} = Dataset.pop(dataset)
reduce(rest, fun.(statement, acc), fun)
def slice(dataset) do
size = Dataset.statement_count(dataset)
{:ok, size, &Enumerable.List.slice(Dataset.statements(dataset), &1, &2, size)}
end
def reduce(_, {:halt, acc}, _fun), do: {:halted, acc}
def reduce(%Dataset{} = dataset, {:suspend, acc}, fun) do
{:suspended, acc, &reduce(dataset, &1, fun)}
def reduce(dataset, acc, fun) do
dataset
|> Dataset.statements()
|> Enumerable.List.reduce(acc, fun)
end
end

View file

@ -622,7 +622,11 @@ defmodule RDF.Description do
The list of all triples within a `RDF.Description`.
"""
@spec triples(t) :: keyword
def triples(%__MODULE__{} = description), do: Enum.to_list(description)
def triples(%__MODULE__{subject: s} = description) do
Enum.flat_map(description.predications, fn {p, os} ->
Enum.map(os, fn {o, _} -> {s, p, o} end)
end)
end
defdelegate statements(description), to: __MODULE__, as: :triples
@ -821,22 +825,18 @@ defmodule RDF.Description do
alias RDF.Description
def member?(desc, triple), do: {:ok, Description.include?(desc, triple)}
def count(desc), do: {:ok, Description.statement_count(desc)}
def slice(_desc), do: {:error, __MODULE__}
def reduce(%Description{predications: predications}, {:cont, acc}, _fun)
when map_size(predications) == 0,
do: {:done, acc}
def reduce(%Description{} = description, {:cont, acc}, fun) do
{triple, rest} = Description.pop(description)
reduce(rest, fun.(triple, acc), fun)
def slice(desc) do
size = Description.statement_count(desc)
{:ok, size, &Enumerable.List.slice(Description.triples(desc), &1, &2, size)}
end
def reduce(_, {:halt, acc}, _fun), do: {:halted, acc}
def reduce(%Description{} = description, {:suspend, acc}, fun) do
{:suspended, acc, &reduce(description, &1, fun)}
def reduce(desc, acc, fun) do
desc
|> Description.triples()
|> Enumerable.List.reduce(acc, fun)
end
end

View file

@ -786,7 +786,11 @@ defmodule RDF.Graph do
{RDF.iri(EX.S2), RDF.iri(EX.p2), RDF.iri(EX.O2)}]
"""
@spec triples(t) :: [Statement.t()]
def triples(%__MODULE__{} = graph), do: Enum.to_list(graph)
def triples(%__MODULE__{} = graph) do
Enum.flat_map(graph.descriptions, fn {_, description} ->
Description.triples(description)
end)
end
defdelegate statements(graph), to: __MODULE__, as: :triples
@ -1088,21 +1092,16 @@ defmodule RDF.Graph do
def member?(graph, triple), do: {:ok, Graph.include?(graph, triple)}
def count(graph), do: {:ok, Graph.statement_count(graph)}
def slice(_graph), do: {:error, __MODULE__}
def reduce(%Graph{descriptions: descriptions}, {:cont, acc}, _fun)
when map_size(descriptions) == 0,
do: {:done, acc}
def reduce(%Graph{} = graph, {:cont, acc}, fun) do
{triple, rest} = Graph.pop(graph)
reduce(rest, fun.(triple, acc), fun)
def slice(graph) do
size = Graph.statement_count(graph)
{:ok, size, &Enumerable.List.slice(Graph.triples(graph), &1, &2, size)}
end
def reduce(_, {:halt, acc}, _fun), do: {:halted, acc}
def reduce(%Graph{} = graph, {:suspend, acc}, fun) do
{:suspended, acc, &reduce(graph, &1, fun)}
def reduce(graph, acc, fun) do
graph
|> Graph.triples()
|> Enumerable.List.reduce(acc, fun)
end
end

View file

@ -1765,6 +1765,21 @@ defmodule RDF.DatasetTest do
PrefixMap.new(ex: EX, foo: RDFS)
end
test "statements/1" do
assert Dataset.new([
{EX.S1, EX.p1(), EX.O1},
{EX.S1, EX.p2(), EX.O2},
{EX.S1, EX.p2(), EX.O2, EX.GraphName},
{EX.S2, EX.p2(), EX.O2, EX.GraphName}
])
|> Dataset.statements() == [
{RDF.iri(EX.S1), EX.p1(), RDF.iri(EX.O1)},
{RDF.iri(EX.S1), EX.p2(), RDF.iri(EX.O2)},
{RDF.iri(EX.S1), EX.p2(), RDF.iri(EX.O2), RDF.iri(EX.GraphName)},
{RDF.iri(EX.S2), EX.p2(), RDF.iri(EX.O2), RDF.iri(EX.GraphName)}
]
end
describe "Enumerable protocol" do
test "Enum.count" do
assert Enum.count(Dataset.new(name: EX.foo())) == 0

View file

@ -894,6 +894,21 @@ defmodule RDF.DescriptionTest do
|> Description.equal?(Description.new(EX.S, init: {EX.S, EX.p(), EX.O2}))
end
test "triples/1" do
assert Description.new(EX.Subject,
init: [
{EX.predicate1(), EX.Object1},
{EX.predicate2(), EX.Object2},
{EX.predicate2(), EX.Object3}
]
)
|> Description.triples() == [
{RDF.iri(EX.Subject), EX.predicate1(), RDF.iri(EX.Object1)},
{RDF.iri(EX.Subject), EX.predicate2(), RDF.iri(EX.Object2)},
{RDF.iri(EX.Subject), EX.predicate2(), RDF.iri(EX.Object3)}
]
end
describe "Enumerable protocol" do
test "Enum.count" do
assert Enum.count(Description.new(EX.foo())) == 0

View file

@ -1448,6 +1448,20 @@ defmodule RDF.GraphTest do
Graph.new()
end
test "triples/1" do
assert Graph.new([
{EX.S1, EX.p1(), EX.O1},
{EX.S2, EX.p2(), EX.O2},
{EX.S1, EX.p3(), EX.O3}
])
|> Graph.triples() ==
[
{RDF.iri(EX.S1), EX.p1(), RDF.iri(EX.O1)},
{RDF.iri(EX.S1), EX.p3(), RDF.iri(EX.O3)},
{RDF.iri(EX.S2), EX.p2(), RDF.iri(EX.O2)}
]
end
describe "Enumerable protocol" do
test "Enum.count" do
assert Enum.count(Graph.new(name: EX.foo())) == 0