json_ld: JSON-LD flattening algorithm

lists not working yet; requires a major modification of the specified algorithm, since we can't fill lists by reference in Elixir
This commit is contained in:
Marcel Otto 2017-03-18 21:52:41 +01:00
parent 4837169dd0
commit 2405d7d32b
4 changed files with 623 additions and 0 deletions

292
lib/json/ld/flattening.ex Normal file
View file

@ -0,0 +1,292 @@
defmodule JSON.LD.Flattening do
@moduledoc nil
import JSON.LD
alias JSON.LD.NodeIdentifierMap
@doc """
Flattens the given input according to the steps in the JSON-LD Flattening Algorithm.
> Flattening collects all properties of a node in a single JSON object and labels
> all blank nodes with blank node identifiers. This ensures a shape of the data
> and consequently may drastically simplify the code required to process JSON-LD
> in certain applications.
-- <https://www.w3.org/TR/json-ld/#flattened-document-form>
Details at <https://www.w3.org/TR/json-ld-api/#flattening-algorithms>
"""
def flatten(input, context \\ nil, opts \\ []) do
with expanded = JSON.LD.expand(input) do
{:ok, node_id_map} = NodeIdentifierMap.start_link
node_map =
try do
generate_node_map(expanded, %{"@default" => %{}}, node_id_map)
after
NodeIdentifierMap.stop(node_id_map)
end
default_graph =
Enum.reduce node_map, node_map["@default"], fn
({"@default", _}, default_graph) -> default_graph
({graph_name, graph}, default_graph) ->
entry =
if Map.has_key?(default_graph, graph_name) do
default_graph[graph_name]
else
%{"@id" => graph_name}
end
graph_entry =
graph
|> Stream.reject(fn {_, node} ->
Map.has_key?(node, "@id") and map_size(node) == 1 end)
|> Enum.sort_by(fn {id, _} -> id end)
# TODO: Spec fixme: Spec doesn't handle the case, when a "@graph" member already exists
|> Enum.reduce(Map.get(entry, "@graph", []), fn ({_, node}, graph_entry) ->
[node | graph_entry]
end)
|> Enum.reverse
Map.put(default_graph, graph_name,
Map.put(entry, "@graph", graph_entry))
end
flattened =
default_graph
|> Enum.sort_by(fn {id, _} -> id end)
|> Enum.reduce([], fn ({_, node}, flattened) ->
if not (Enum.count(node) == 1 and Map.has_key?(node, "@id")) do
[node | flattened]
else
flattened
end
end)
|> Enum.reverse
if context && !Enum.empty?(flattened) do # TODO: Spec fixme: !Enum.empty?(flattened) is not in the spec, but in other implementations (Ruby, Java, Go, ...)
compact(flattened, context, opts)
else
flattened
end
end
end
@doc """
Node Map Generation
Details at <https://www.w3.org/TR/json-ld-api/#node-map-generation>
"""
def generate_node_map(element, node_map, node_id_map, active_graph \\ "@default",
active_subject \\ nil, active_property \\ nil, list \\ nil)
# 1)
def generate_node_map(element, node_map, node_id_map, active_graph, active_subject,
active_property, list) when is_list(element) do
Enum.reduce element, node_map, fn (item, node_map) ->
generate_node_map(item, node_map, node_id_map, active_graph, active_subject,
active_property, list)
end
end
# 2)
def generate_node_map(element, node_map, node_id_map, active_graph, active_subject,
active_property, list) when is_map(element) do
identifier_map = %{}
counter = 1
node_map = Map.put_new(node_map, active_graph, %{})
node = node_map[active_graph][active_subject]
# 3)
if types = Map.get(element, "@type") do
types = Enum.reduce(types, [],
fn (item, types) ->
if blank_node_id?(item) do
identifier = NodeIdentifierMap.generate_blank_node_id(node_id_map, item)
types ++ [identifier]
else
types ++ [item]
end
end)
element = Map.put(element, "@type", types)
end
cond do
# 4)
Map.has_key?(element, "@value") ->
if is_nil(list) do
if node do
update_in(node_map, [active_graph, active_subject, active_property], fn
nil -> [element]
items ->
unless element in items,
do: items ++ [element],
else: items
end)
else
node_map
end
else
# TODO: list a reference! We'll have to rewrite this to work without references
list = Map.update(list, "@list", [element], fn l -> l ++ [element] end)
node_map
end
# 5)
Map.has_key?(element, "@list") ->
result = %{"@list" => []}
node_map = generate_node_map(element["@list"], node_map, node_id_map,
active_graph, active_subject, active_property, result)
if node do
update_in(node_map, [active_graph, active_subject, active_property], fn
nil -> [result]
items -> items ++ [result]
end)
else
node_map
end
# 6)
true ->
# 6.1)
{id, element} = Map.pop(element, "@id")
id =
if id do
if blank_node_id?(id) do
NodeIdentifierMap.generate_blank_node_id(node_id_map, id)
else
id
end
# 6.2)
else
NodeIdentifierMap.generate_blank_node_id(node_id_map)
end
# 6.3)
unless Map.has_key?(node_map[active_graph], id) do
node_map = Map.update!(node_map, active_graph, fn graph ->
Map.put_new(graph, id, %{"@id" => id})
end)
end
# 6.4) TODO: Spec fixme: "this line is asked for by the spec, but it breaks various tests" (according to Java and Go implementation, which perform this step before 6.7) instead)
node = node_map[active_graph][id]
# 6.5)
if is_map(active_subject) do
unless Map.has_key?(node, active_property) do
node_map =
update_in(node_map, [active_graph, id, active_property], fn
nil -> [active_subject]
items ->
unless active_subject in items,
do: items ++ [active_subject],
else: items
end)
end
# 6.6)
else
unless is_nil(active_property) do
reference = %{"@id" => id}
if is_nil(list) do
node_map =
update_in(node_map, [active_graph, active_subject, active_property], fn
nil -> [reference]
items ->
unless reference in items,
do: items ++ [reference],
else: items
end)
# 6.6.3) TODO: Spec fixme: specs says to add ELEMENT to @list member, should be REFERENCE
else
# TODO: list a reference! We'll have to rewrite this to work without references
list = Map.update(list, "@list", [reference], fn l -> l ++ [reference] end)
end
end
end
# 6.7)
if Map.has_key?(element, "@type") do
node_map =
Enum.reduce element["@type"], node_map, fn (type, node_map) ->
update_in(node_map, [active_graph, id, "@type"], fn
nil -> [type]
items ->
unless type in items,
do: items ++ [type],
else: items
end)
end
element = Map.delete(element, "@type")
end
# 6.8)
if Map.has_key?(element, "@index") do
{element_index, element} = Map.pop(element, "@index")
if node_index = get_in(node_map, [active_graph, id, "@index"]) do
if not deep_compare(node_index, element_index) do
raise JSON.LD.ConflictingIndexesError,
message: "Multiple conflicting indexes have been found for the same node."
end
else
node_map =
update_in node_map, [active_graph, id], fn node ->
Map.put(node, "@index", element_index)
end
end
end
# 6.9)
if Map.has_key?(element, "@reverse") do
referenced_node = %{"@id" => id}
{reverse_map, element} = Map.pop(element, "@reverse")
node_map =
Enum.reduce reverse_map, node_map, fn ({property, values}, node_map) ->
Enum.reduce values, node_map, fn (value, node_map) ->
generate_node_map(value, node_map, node_id_map, active_graph,
referenced_node, property)
end
end
end
# 6.10)
if Map.has_key?(element, "@graph") do
{graph, element} = Map.pop(element, "@graph")
node_map = generate_node_map(graph, node_map, node_id_map, id)
end
# 6.11)
element
|> Enum.sort_by(fn {property, _} -> property end)
|> Enum.reduce(node_map, fn ({property, value}, node_map) ->
if blank_node_id?(property) do
property = NodeIdentifierMap.generate_blank_node_id(node_id_map, property)
end
unless Map.has_key?(node_map[active_graph][id], property) do
node_map = update_in node_map, [active_graph, id], fn node ->
Map.put(node, property, [])
end
end
generate_node_map(value, node_map, node_id_map, active_graph, id, property)
end)
end
end
defp deep_compare(v1, v2) when is_map(v1) and is_map(v2) do
Enum.count(v1) == Enum.count(v2) &&
Enum.all?(v1, fn {k, v} ->
Map.has_key?(v2, k) && deep_compare(v, v2[k])
end)
end
defp deep_compare(v1, v2) when is_list(v1) and is_list(v2) do
Enum.count(v1) == Enum.count(v2) && MapSet.new(v1) == MapSet.new(v2)
end
defp deep_compare(v, v), do: true
defp deep_compare(_, _), do: false
end

View file

@ -0,0 +1,51 @@
defmodule JSON.LD.NodeIdentifierMap do
@moduledoc nil
use GenServer
import JSON.LD
# Client API
def start_link(opts \\ []) do
GenServer.start_link(__MODULE__, :ok, opts)
end
def stop(pid, reason \\ :normal, timeout \\ :infinity) do
GenServer.stop(pid, reason, timeout)
end
@doc """
Generate Blank Node Identifier
Details at <https://www.w3.org/TR/json-ld-api/#generate-blank-node-identifier>
"""
def generate_blank_node_id(pid, identifier \\ nil) do
GenServer.call(pid, {:generate_id, identifier})
end
# Server Callbacks
def init(:ok) do
{:ok, %{map: %{}, counter: 0}}
end
def handle_call({:generate_id, identifier}, _, %{map: map, counter: counter} = state) do
if identifier && (mapped_identifier = map[identifier]) do
{:reply, mapped_identifier, state}
else
blank_node_id = "_:b#{counter}"
{:reply, blank_node_id, %{
counter: counter + 1,
map:
if identifier do
Map.put(map, identifier, blank_node_id)
else
map
end
}}
end
end
end

View file

@ -34,6 +34,9 @@ defmodule JSON.LD do
defdelegate compact(json_ld_object, context, opts \\ []),
to: JSON.LD.Compaction
defdelegate flatten(json_ld_object, context \\ nil, opts \\ []),
to: JSON.LD.Flattening
@doc """
Generator function for `JSON.LD.Context`s.

View file

@ -0,0 +1,277 @@
defmodule JSON.LD.FlatteningTest do
use ExUnit.Case, async: false
alias RDF.NS.RDFS
test "Flattened form of a JSON-LD document (EXAMPLE 60 and 61 of https://www.w3.org/TR/json-ld/#flattened-document-form)" do
input = Poison.Parser.parse! """
{
"@context": {
"name": "http://xmlns.com/foaf/0.1/name",
"knows": "http://xmlns.com/foaf/0.1/knows"
},
"@id": "http://me.markus-lanthaler.com/",
"name": "Markus Lanthaler",
"knows": [
{
"@id": "http://manu.sporny.org/about#manu",
"name": "Manu Sporny"
},
{
"name": "Dave Longley"
}
]
}
"""
assert JSON.LD.flatten(input, input) == Poison.Parser.parse! """
{
"@context": {
"name": "http://xmlns.com/foaf/0.1/name",
"knows": "http://xmlns.com/foaf/0.1/knows"
},
"@graph": [
{
"@id": "_:b0",
"name": "Dave Longley"
},
{
"@id": "http://manu.sporny.org/about#manu",
"name": "Manu Sporny"
},
{
"@id": "http://me.markus-lanthaler.com/",
"name": "Markus Lanthaler",
"knows": [
{ "@id": "http://manu.sporny.org/about#manu" },
{ "@id": "_:b0" }
]
}
]
}
"""
end
%{
"single object" => %{
input: %{"@id" => "http://example.com", "@type" => to_string(RDF.uri(RDFS.Resource))},
output: [
%{"@id" => "http://example.com", "@type" => [to_string(RDF.uri(RDFS.Resource))]}
]
},
"embedded object" => %{
input: %{
"@context" => %{
"foaf" => "http://xmlns.com/foaf/0.1/"
},
"@id" => "http://greggkellogg.net/foaf",
"@type" => ["foaf:PersonalProfileDocument"],
"foaf:primaryTopic" => [%{
"@id" => "http://greggkellogg.net/foaf#me",
"@type" => ["foaf:Person"]
}]
},
output: [
%{
"@id" => "http://greggkellogg.net/foaf",
"@type" => ["http://xmlns.com/foaf/0.1/PersonalProfileDocument"],
"http://xmlns.com/foaf/0.1/primaryTopic" => [%{"@id" => "http://greggkellogg.net/foaf#me"}]
},
%{
"@id" => "http://greggkellogg.net/foaf#me",
"@type" => ["http://xmlns.com/foaf/0.1/Person"]
}
]
},
"embedded anon" => %{
input: %{
"@context" => %{
"foaf" => "http://xmlns.com/foaf/0.1/"
},
"@id" => "http://greggkellogg.net/foaf",
"@type" => "foaf:PersonalProfileDocument",
"foaf:primaryTopic" => %{
"@type" => "foaf:Person"
}
},
output: [
%{
"@id" => "_:b0",
"@type" => ["http://xmlns.com/foaf/0.1/Person"]
},
%{
"@id" => "http://greggkellogg.net/foaf",
"@type" => ["http://xmlns.com/foaf/0.1/PersonalProfileDocument"],
"http://xmlns.com/foaf/0.1/primaryTopic" => [%{"@id" => "_:b0"}]
}
]
},
"reverse properties" => %{
input: Poison.Parser.parse!("""
[
{
"@id": "http://example.com/people/markus",
"@reverse": {
"http://xmlns.com/foaf/0.1/knows": [
{
"@id": "http://example.com/people/dave"
},
{
"@id": "http://example.com/people/gregg"
}
]
},
"http://xmlns.com/foaf/0.1/name": [ { "@value": "Markus Lanthaler" } ]
}
]
"""),
output: Poison.Parser.parse!("""
[
{
"@id": "http://example.com/people/dave",
"http://xmlns.com/foaf/0.1/knows": [
{
"@id": "http://example.com/people/markus"
}
]
},
{
"@id": "http://example.com/people/gregg",
"http://xmlns.com/foaf/0.1/knows": [
{
"@id": "http://example.com/people/markus"
}
]
},
{
"@id": "http://example.com/people/markus",
"http://xmlns.com/foaf/0.1/name": [
{
"@value": "Markus Lanthaler"
}
]
}
]
""")
},
"Simple named graph (Wikidata)" => %{
input: Poison.Parser.parse!("""
{
"@context": {
"rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
"ex": "http://example.org/",
"xsd": "http://www.w3.org/2001/XMLSchema#",
"ex:locatedIn": {"@type": "@id"},
"ex:hasPopulaton": {"@type": "xsd:integer"},
"ex:hasReference": {"@type": "@id"}
},
"@graph": [
{
"@id": "http://example.org/ParisFact1",
"@type": "rdf:Graph",
"@graph": {
"@id": "http://example.org/location/Paris#this",
"ex:locatedIn": "http://example.org/location/France#this"
},
"ex:hasReference": ["http://www.britannica.com/", "http://www.wikipedia.org/", "http://www.brockhaus.de/"]
},
{
"@id": "http://example.org/ParisFact2",
"@type": "rdf:Graph",
"@graph": {
"@id": "http://example.org/location/Paris#this",
"ex:hasPopulation": 7000000
},
"ex:hasReference": "http://www.wikipedia.org/"
}
]
}
"""),
output: Poison.Parser.parse!("""
[{
"@id": "http://example.org/ParisFact1",
"@type": ["http://www.w3.org/1999/02/22-rdf-syntax-ns#Graph"],
"http://example.org/hasReference": [
{"@id": "http://www.britannica.com/"},
{"@id": "http://www.wikipedia.org/"},
{"@id": "http://www.brockhaus.de/"}
],
"@graph": [{
"@id": "http://example.org/location/Paris#this",
"http://example.org/locatedIn": [{"@id": "http://example.org/location/France#this"}]
}]
}, {
"@id": "http://example.org/ParisFact2",
"@type": ["http://www.w3.org/1999/02/22-rdf-syntax-ns#Graph"],
"http://example.org/hasReference": [{"@id": "http://www.wikipedia.org/"}],
"@graph": [{
"@id": "http://example.org/location/Paris#this",
"http://example.org/hasPopulation": [{"@value": 7000000}]
}]
}]
""")
},
# TODO: @list don't work yet, since the reference-based implementation in the spec is not possible in Elixir
# "Test Manifest (shortened)" => %{
# input: Poison.Parser.parse!("""
# {
# "@id": "",
# "http://example/sequence": {"@list": [
# {
# "@id": "#t0001",
# "http://example/name": "Keywords cannot be aliased to other keywords",
# "http://example/input": {"@id": "error-expand-0001-in.jsonld"}
# }
# ]}
# }
# """),
# output: Poison.Parser.parse!("""
# [{
# "@id": "",
# "http://example/sequence": [{"@list": [{"@id": "#t0001"}]}]
# }, {
# "@id": "#t0001",
# "http://example/input": [{"@id": "error-expand-0001-in.jsonld"}],
# "http://example/name": [{"@value": "Keywords cannot be aliased to other keywords"}]
# }]
# """),
# options: %{}
# },
"@reverse bnode issue (0045)" => %{
input: Poison.Parser.parse!("""
{
"@context": {
"foo": "http://example.org/foo",
"bar": { "@reverse": "http://example.org/bar", "@type": "@id" }
},
"foo": "Foo",
"bar": [ "http://example.org/origin", "_:b0" ]
}
"""),
output: Poison.Parser.parse!("""
[
{
"@id": "_:b0",
"http://example.org/foo": [ { "@value": "Foo" } ]
},
{
"@id": "_:b1",
"http://example.org/bar": [ { "@id": "_:b0" } ]
},
{
"@id": "http://example.org/origin",
"http://example.org/bar": [ { "@id": "_:b0" } ]
}
]
"""),
options: %{}
}
}
|> Enum.each(fn ({title, data}) ->
@tag data: data
test title, %{data: data} do
assert JSON.LD.flatten(data.input) == data.output
end
end)
end