368 lines
11 KiB
Elixir
368 lines
11 KiB
Elixir
defmodule RDF.Turtle.Encoder do
|
|
@moduledoc false
|
|
|
|
use RDF.Serialization.Encoder
|
|
|
|
alias RDF.Turtle.Encoder.State
|
|
alias RDF.{IRI, Literal, BlankNode, Description}
|
|
|
|
@indentation_char " "
|
|
@indentation 4
|
|
|
|
@native_supported_datatypes [
|
|
RDF.Datatype.NS.XSD.boolean,
|
|
RDF.Datatype.NS.XSD.integer,
|
|
RDF.Datatype.NS.XSD.double,
|
|
RDF.Datatype.NS.XSD.decimal
|
|
]
|
|
@rdf_type RDF.type
|
|
@rdf_nil RDF.nil
|
|
|
|
# Defines rdf:type of subjects to be serialized at the beginning of the encoded graph
|
|
@top_classes [RDF.NS.RDFS.Class] |> Enum.map(&RDF.iri/1)
|
|
|
|
# Defines order of predicates at the beginning of a resource description
|
|
@predicate_order [RDF.type, RDF.NS.RDFS.label, RDF.iri("http://purl.org/dc/terms/title")]
|
|
@ordered_properties MapSet.new(@predicate_order)
|
|
|
|
|
|
@impl RDF.Serialization.Encoder
|
|
def encode(data, opts \\ []) do
|
|
with base = Keyword.get(opts, :base) |> init_base(),
|
|
prefixes = Keyword.get(opts, :prefixes, %{}) |> init_prefixes(),
|
|
{:ok, state} = State.start_link(data, base, prefixes) do
|
|
try do
|
|
State.preprocess(state)
|
|
|
|
{:ok,
|
|
base_directive(base) <>
|
|
prefix_directives(prefixes) <>
|
|
graph_statements(state)
|
|
}
|
|
after
|
|
State.stop(state)
|
|
end
|
|
end
|
|
end
|
|
|
|
defp init_base(nil), do: nil
|
|
|
|
defp init_base(base) do
|
|
with base = to_string(base) do
|
|
if String.ends_with?(base, ~w[/ #]) do
|
|
{:ok, base}
|
|
else
|
|
IO.warn("invalid base: #{base}")
|
|
{:bad, base}
|
|
end
|
|
end
|
|
end
|
|
|
|
defp init_prefixes(nil), do: %{}
|
|
|
|
defp init_prefixes(prefixes) do
|
|
Enum.reduce prefixes, %{}, fn {prefix, iri}, reverse ->
|
|
Map.put(reverse, RDF.iri(iri), to_string(prefix))
|
|
end
|
|
end
|
|
|
|
|
|
defp base_directive(nil), do: ""
|
|
defp base_directive({_, base}), do: "@base <#{base}> .\n"
|
|
|
|
defp prefix_directive({ns, prefix}), do: "@prefix #{prefix}: <#{to_string(ns)}> .\n"
|
|
|
|
defp prefix_directives(prefixes) do
|
|
case Enum.map(prefixes, &prefix_directive/1) do
|
|
[] -> ""
|
|
prefixes -> Enum.join(prefixes, "") <> "\n"
|
|
end
|
|
end
|
|
|
|
|
|
defp graph_statements(state) do
|
|
State.data(state)
|
|
|> RDF.Data.descriptions
|
|
|> order_descriptions(state)
|
|
|> Enum.map(&description_statements(&1, state))
|
|
|> Enum.reject(&is_nil/1)
|
|
|> Enum.join("\n")
|
|
end
|
|
|
|
defp order_descriptions(descriptions, state) do
|
|
base_iri = State.base_iri(state)
|
|
group =
|
|
Enum.group_by descriptions, fn
|
|
%Description{subject: ^base_iri} ->
|
|
:base
|
|
description ->
|
|
with types when not is_nil(types) <- description.predications[@rdf_type] do
|
|
Enum.find @top_classes, :other, fn top_class ->
|
|
Map.has_key?(types, top_class)
|
|
end
|
|
else
|
|
_ -> :other
|
|
end
|
|
end
|
|
ordered_descriptions = (
|
|
@top_classes
|
|
|> Stream.map(fn top_class -> group[top_class] end)
|
|
|> Stream.reject(&is_nil/1)
|
|
|> Stream.map(&sort_description_group/1)
|
|
|> Enum.reduce([], fn class_group, ordered_descriptions ->
|
|
ordered_descriptions ++ class_group
|
|
end)
|
|
) ++ (group |> Map.get(:other, []) |> sort_description_group())
|
|
|
|
case group[:base] do
|
|
[base] -> [base | ordered_descriptions]
|
|
_ -> ordered_descriptions
|
|
end
|
|
end
|
|
|
|
defp sort_description_group(descriptions) do
|
|
Enum.sort descriptions, fn
|
|
%Description{subject: %IRI{}}, %Description{subject: %BlankNode{}} -> true
|
|
%Description{subject: %BlankNode{}}, %Description{subject: %IRI{}} -> false
|
|
%Description{subject: s1}, %Description{subject: s2} ->
|
|
to_string(s1) < to_string(s2)
|
|
end
|
|
end
|
|
|
|
defp description_statements(description, state, nesting \\ 0) do
|
|
with %BlankNode{} <- description.subject,
|
|
ref_count when ref_count < 2 <-
|
|
State.bnode_ref_counter(state, description.subject)
|
|
do
|
|
unrefed_bnode_subject_term(description, ref_count, state, nesting)
|
|
else
|
|
_ -> full_description_statements(description, state, nesting)
|
|
end
|
|
end
|
|
|
|
defp full_description_statements(subject, description, state, nesting) do
|
|
with nesting = nesting + @indentation do
|
|
subject <> newline_indent(nesting) <> (
|
|
predications(description, state, nesting)
|
|
) <> " .\n"
|
|
end
|
|
end
|
|
|
|
defp full_description_statements(description, state, nesting) do
|
|
term(description.subject, state, :subject, nesting)
|
|
|> full_description_statements(description, state, nesting)
|
|
end
|
|
|
|
defp blank_node_property_list(description, state, nesting) do
|
|
with indented = nesting + @indentation do
|
|
"[" <> newline_indent(indented) <>
|
|
predications(description, state, indented) <>
|
|
newline_indent(nesting) <> "]"
|
|
end
|
|
end
|
|
|
|
defp predications(description, state, nesting) do
|
|
description.predications
|
|
|> order_predications()
|
|
|> Enum.map(&predication(&1, state, nesting))
|
|
|> Enum.join(" ;" <> newline_indent(nesting))
|
|
end
|
|
|
|
defp order_predications(predications) do
|
|
sorted_predications =
|
|
@predicate_order
|
|
|> Enum.map(fn predicate -> {predicate, predications[predicate]} end)
|
|
|> Enum.reject(fn {_, objects} -> is_nil(objects) end)
|
|
|
|
unsorted_predications =
|
|
Enum.reject(predications, fn {predicate, _} ->
|
|
MapSet.member?(@ordered_properties, predicate)
|
|
end)
|
|
|
|
sorted_predications ++ unsorted_predications
|
|
end
|
|
|
|
defp predication({predicate, objects}, state, nesting) do
|
|
term(predicate, state, :predicate, nesting) <> " " <> (
|
|
objects
|
|
|> Enum.map(fn {object, _} -> term(object, state, :object, nesting) end)
|
|
|> Enum.join(", ") # TODO: split if the line gets too long
|
|
)
|
|
end
|
|
|
|
|
|
defp unrefed_bnode_subject_term(bnode_description, ref_count, state, nesting) do
|
|
if valid_list_node?(bnode_description.subject, state) do
|
|
case ref_count do
|
|
0 ->
|
|
bnode_description.subject
|
|
|> list_term(state, nesting)
|
|
|> full_description_statements(
|
|
list_subject_description(bnode_description), state, nesting)
|
|
1 ->
|
|
nil
|
|
_ ->
|
|
raise "Internal error: This shouldn't happen. Please raise an issue in the RDF.ex project with the input document causing this error."
|
|
end
|
|
else
|
|
case ref_count do
|
|
0 ->
|
|
blank_node_property_list(bnode_description, state, nesting) <> " .\n"
|
|
1 ->
|
|
nil
|
|
_ ->
|
|
raise "Internal error: This shouldn't happen. Please raise an issue in the RDF.ex project with the input document causing this error."
|
|
end
|
|
end
|
|
end
|
|
|
|
defp list_subject_description(description) do
|
|
with description = Description.delete_predicates(description, [RDF.first, RDF.rest]) do
|
|
if Enum.count(description.predications) == 0 do
|
|
# since the Turtle grammar doesn't allow bare lists, we add a statement
|
|
description |> RDF.type(RDF.List)
|
|
else
|
|
description
|
|
end
|
|
end
|
|
end
|
|
|
|
defp unrefed_bnode_object_term(bnode, ref_count, state, nesting) do
|
|
if valid_list_node?(bnode, state) do
|
|
list_term(bnode, state, nesting)
|
|
else
|
|
if ref_count == 1 do
|
|
State.data(state)
|
|
|> RDF.Data.description(bnode)
|
|
|> blank_node_property_list(state, nesting)
|
|
else
|
|
raise "Internal error: This shouldn't happen. Please raise an issue in the RDF.ex project with the input document causing this error."
|
|
end
|
|
end
|
|
end
|
|
|
|
defp valid_list_node?(bnode, state) do
|
|
MapSet.member?(State.list_nodes(state), bnode)
|
|
end
|
|
|
|
defp list_term(head, state, nesting) do
|
|
head
|
|
|> State.list_values(state)
|
|
|> term(state, :list, nesting)
|
|
end
|
|
|
|
|
|
defp term(@rdf_type, _, :predicate, _), do: "a"
|
|
defp term(@rdf_nil, _, _, _), do: "()"
|
|
|
|
defp term(%IRI{} = iri, state, _, _) do
|
|
based_name(iri, State.base(state)) ||
|
|
prefixed_name(iri, State.prefixes(state)) ||
|
|
"<#{to_string(iri)}>"
|
|
end
|
|
|
|
defp term(%BlankNode{} = bnode, state, position, nesting)
|
|
when position in ~w[object list]a do
|
|
if (ref_count = State.bnode_ref_counter(state, bnode)) <= 1 do
|
|
unrefed_bnode_object_term(bnode, ref_count, state, nesting)
|
|
else
|
|
to_string(bnode)
|
|
end
|
|
end
|
|
|
|
defp term(%BlankNode{} = bnode, _, _, _),
|
|
do: to_string(bnode)
|
|
|
|
defp term(%Literal{value: value, language: language}, _,_ , _) when not is_nil(language),
|
|
do: ~s["#{value}"@#{language}]
|
|
|
|
defp term(%Literal{value: value, language: language}, _,_ , _) when not is_nil(language),
|
|
do: ~s["#{value}"@#{language}]
|
|
|
|
defp term(%Literal{datatype: datatype} = literal, _, _,_) when is_xsd_string(datatype),
|
|
do: literal |> Literal.lexical |> quoted()
|
|
|
|
defp term(%Literal{datatype: datatype} = literal, state, _, nesting)
|
|
when datatype in @native_supported_datatypes do
|
|
if Literal.valid?(literal) do
|
|
literal |> Literal.canonical |> Literal.lexical
|
|
else
|
|
typed_literal_term(literal, state, nesting)
|
|
end
|
|
end
|
|
|
|
defp term(%Literal{} = literal, state, _, nesting),
|
|
do: typed_literal_term(literal, state, nesting)
|
|
|
|
defp term(list, state, _, nesting) when is_list(list) do
|
|
"(" <>
|
|
(
|
|
list
|
|
|> Enum.map(&term(&1, state, :list, nesting))
|
|
|> Enum.join(" ")
|
|
) <>
|
|
")"
|
|
end
|
|
|
|
defp based_name(%IRI{} = iri, base), do: based_name(to_string(iri), base)
|
|
defp based_name(iri, {:ok, base}) do
|
|
if String.starts_with?(iri, base) do
|
|
"<#{String.slice(iri, String.length(base)..-1)}>"
|
|
end
|
|
end
|
|
|
|
defp based_name(_, _), do: nil
|
|
|
|
|
|
defp typed_literal_term(%Literal{datatype: datatype} = literal, state, nesting),
|
|
do: ~s["#{Literal.lexical(literal)}"^^#{term(datatype, state, :datatype, nesting)}]
|
|
|
|
|
|
def prefixed_name(iri, prefixes) do
|
|
with {ns, name} <- split_iri(iri) do
|
|
case prefixes[ns] do
|
|
nil -> nil
|
|
prefix -> prefix <> ":" <> name
|
|
end
|
|
end
|
|
end
|
|
|
|
defp split_iri(%IRI{} = iri),
|
|
do: iri |> IRI.parse |> split_iri()
|
|
|
|
defp split_iri(%URI{fragment: fragment} = uri) when not is_nil(fragment),
|
|
do: {RDF.iri(%URI{uri | fragment: ""}), fragment}
|
|
|
|
defp split_iri(%URI{path: nil}),
|
|
do: nil
|
|
|
|
defp split_iri(%URI{path: path} = uri) do
|
|
with [{pos, _}] = Regex.run(~r"[^/]*$"u, path, return: :index),
|
|
{ns_path, name} = String.split_at(path, pos) do
|
|
{RDF.iri(%URI{uri | path: ns_path}), name}
|
|
end
|
|
end
|
|
|
|
defp quoted(string) do
|
|
if String.contains?(string, ["\n", "\r"]) do
|
|
~s["""#{string}"""]
|
|
else
|
|
~s["#{escape(string)}"]
|
|
end
|
|
end
|
|
|
|
defp escape(string) do
|
|
string
|
|
|> String.replace("\\", "\\\\\\\\")
|
|
|> String.replace("\b", "\\b")
|
|
|> String.replace("\f", "\\f")
|
|
|> String.replace("\t", "\\t")
|
|
|> String.replace("\n", "\\n")
|
|
|> String.replace("\r", "\\r")
|
|
|> String.replace("\"", ~S[\"])
|
|
end
|
|
|
|
|
|
defp newline_indent(nesting),
|
|
do: "\n" <> String.duplicate(@indentation_char, nesting)
|
|
end
|