rdf-ex/lib/rdf/serializations/turtle_encoder.ex

474 lines
15 KiB
Elixir

defmodule RDF.Turtle.Encoder do
@moduledoc """
An encoder for Turtle serializations of RDF.ex data structures.
As for all encoders of `RDF.Serialization.Format`s, you normally won't use these
functions directly, but via one of the `write_` functions on the `RDF.Turtle`
format module or the generic `RDF.Serialization` module.
## Options
- `:prefixes`: Allows to specify the prefixes to be used as a `RDF.PrefixMap` or
anything from which a `RDF.PrefixMap` can be created with `RDF.PrefixMap.new/1`.
If not specified the ones from the given graph are used or if these are also not
present the `RDF.default_prefixes/0`.
- `:base`: : Allows to specify the base URI to be used for a `@base` directive.
If not specified the one from the given graph is used or if there is also none
specified for the graph the `RDF.default_base_iri/0`.
- `:implicit_base`: This boolean flag allows to use a base URI to get relative IRIs
without embedding it explicitly in the content with a `@base` directive, so that
the URIs will be resolved according to the remaining strategy specified in
section 5.1 of [RFC3986](https://www.ietf.org/rfc/rfc3986.txt) (default: `false`).
- `:base_description`: Allows to provide a description of the resource denoted by
the base URI. This option is especially useful when the base URI is actually not
specified, eg. in the common use case of wanting to describe the Turtle document
itself, which should be denoted by the URL where it is hosted as the implicit base
URI.
- `:only`: Allows to specify which parts of a Turtle document should be generated.
Possible values: `:base`, `:prefixes`, `:directives` (means the same as `[:base, :prefixes]`),
`:triples` or a list with any combination of these values.
- `:indent`: Allows to specify the number of spaces the output should be indented.
"""
use RDF.Serialization.Encoder
alias RDF.Turtle.Encoder.State
alias RDF.Turtle.Star.CompactGraph
alias RDF.{BlankNode, Description, Graph, IRI, XSD, Literal, LangString, PrefixMap}
import RDF.NTriples.Encoder, only: [escape_string: 1]
@document_structure [
:base,
:prefixes,
:triples
]
@indentation_char " "
@indentation 4
@native_supported_datatypes [
XSD.Boolean,
XSD.Integer,
XSD.Double,
XSD.Decimal
]
@rdf_type RDF.Utils.Bootstrapping.rdf_iri("type")
@rdf_nil RDF.Utils.Bootstrapping.rdf_iri("nil")
# Defines rdf:type of subjects to be serialized at the beginning of the encoded graph
@top_classes [RDF.Utils.Bootstrapping.rdfs_iri("Class")]
# Defines order of predicates at the beginning of a resource description
@predicate_order [
@rdf_type,
RDF.Utils.Bootstrapping.rdfs_iri("label"),
RDF.iri("http://purl.org/dc/terms/title")
]
@ordered_properties MapSet.new(@predicate_order)
@implicit_default_base "http://this-implicit-default-base-iri-should-never-appear-in-a-document"
@impl RDF.Serialization.Encoder
@spec encode(Graph.t() | Description.t(), keyword) :: {:ok, String.t()} | {:error, any}
def encode(data, opts \\ [])
def encode(%Description{} = description, opts), do: description |> Graph.new() |> encode(opts)
def encode(%Graph{} = graph, opts) do
base =
Keyword.get(opts, :base, Keyword.get(opts, :base_iri))
|> base_iri(graph)
|> init_base_iri()
prefixes =
Keyword.get(opts, :prefixes)
|> prefixes(graph)
{graph, base, opts} =
add_base_description(graph, base, Keyword.get(opts, :base_description), opts)
{:ok, state} = State.start_link(graph, base, prefixes)
try do
State.preprocess(state)
{:ok,
(Keyword.get(opts, :only) || @document_structure)
|> compile(base, prefixes, state, opts)}
after
State.stop(state)
end
end
defp compile(:base, base, _, _, opts), do: base_directive(base, opts)
defp compile(:prefixes, _, prefixes, _, opts), do: prefix_directives(prefixes, opts)
defp compile(:triples, _, _, state, opts), do: graph_statements(state, opts)
defp compile(:directives, base, prefixes, state, opts),
do: [:base, :prefixes] |> compile(base, prefixes, state, opts)
defp compile(elements, base, prefixes, state, opts) when is_list(elements) do
Enum.map_join(elements, &compile(&1, base, prefixes, state, opts))
end
defp compile(element, _, _, _, _) do
raise "unknown Turtle document element: #{inspect(element)}"
end
defp base_iri(nil, %Graph{base_iri: base_iri}) when not is_nil(base_iri), do: base_iri
defp base_iri(nil, _), do: RDF.default_base_iri()
defp base_iri(base_iri, _), do: IRI.coerce_base(base_iri)
defp init_base_iri(nil), do: nil
defp init_base_iri(base_iri), do: to_string(base_iri)
defp prefixes(nil, %Graph{prefixes: prefixes}) when not is_nil(prefixes), do: prefixes
defp prefixes(nil, _), do: RDF.default_prefixes()
defp prefixes(prefixes, _), do: PrefixMap.new(prefixes)
defp base_directive(nil, _), do: ""
defp base_directive(base, opts) do
if Keyword.get(opts, :implicit_base, false) do
""
else
indent(opts) <>
case Keyword.get(opts, :directive_style) do
:sparql -> "BASE <#{base}>"
_ -> "@base <#{base}> ."
end <> "\n\n"
end
end
defp prefix_directive({prefix, ns}, opts) do
indent(opts) <>
case Keyword.get(opts, :directive_style) do
:sparql -> "PREFIX #{prefix}: <#{to_string(ns)}>\n"
_ -> "@prefix #{prefix}: <#{to_string(ns)}> .\n"
end
end
defp prefix_directives(prefixes, opts) do
case Enum.map(prefixes, &prefix_directive(&1, opts)) do
[] -> ""
prefixes -> Enum.join(prefixes, "") <> "\n"
end
end
defp add_base_description(graph, base, nil, opts), do: {graph, base, opts}
defp add_base_description(graph, nil, base_description, opts) do
add_base_description(
graph,
@implicit_default_base,
base_description,
Keyword.put(opts, :implicit_base, true)
)
end
defp add_base_description(graph, base, base_description, opts) do
{Graph.add(graph, Description.new(base, init: base_description)), base, opts}
end
defp graph_statements(state, opts) do
indent = indent(opts)
State.data(state)
|> CompactGraph.compact()
|> RDF.Data.descriptions()
|> order_descriptions(state)
|> Enum.map(&description_statements(&1, state, Keyword.get(opts, :indent, 0)))
|> Enum.reject(&is_nil/1)
|> Enum.map_join("\n", &(indent <> &1))
end
defp order_descriptions(descriptions, state) do
base_iri = State.base_iri(state)
group = Enum.group_by(descriptions, &description_group(&1, base_iri))
ordered_descriptions =
(@top_classes
|> Stream.map(&group[&1])
|> Stream.reject(&is_nil/1)
|> Enum.flat_map(&sort_descriptions/1)) ++
(group |> Map.get(:other, []) |> sort_descriptions())
case group[:base] do
[base] -> [base | ordered_descriptions]
_ -> ordered_descriptions
end
end
defp description_group(%{subject: base_iri}, base_iri), do: :base
defp description_group(description, _) do
if types = description.predications[@rdf_type] do
Enum.find(@top_classes, :other, &Map.has_key?(types, &1))
else
:other
end
end
defp sort_descriptions(descriptions), do: Enum.sort(descriptions, &description_order/2)
defp description_order(%{subject: %IRI{}}, %{subject: %BlankNode{}}), do: true
defp description_order(%{subject: %BlankNode{}}, %{subject: %IRI{}}), do: false
defp description_order(%{subject: {s, p, o1}}, %{subject: {s, p, o2}}),
do: to_string(o1) < to_string(o2)
defp description_order(%{subject: {s, p1, _}}, %{subject: {s, p2, _}}),
do: to_string(p1) < to_string(p2)
defp description_order(%{subject: {s1, _, _}}, %{subject: {s2, _, _}}),
do: to_string(s1) < to_string(s2)
defp description_order(%{subject: {_, _, _}}, %{subject: _}), do: false
defp description_order(%{subject: _}, %{subject: {_, _, _}}), do: true
defp description_order(%{subject: s1}, %{subject: s2}), do: to_string(s1) < to_string(s2)
defp description_statements(description, state, nesting) do
with %BlankNode{} <- description.subject,
ref_count when ref_count < 2 <- State.bnode_ref_counter(state, description.subject) do
unrefed_bnode_subject_term(description, ref_count, state, nesting)
else
_ -> full_description_statements(description, state, nesting)
end
end
defp full_description_statements(subject, description, state, nesting) do
nesting = nesting + @indentation
subject <> newline_indent(nesting) <> predications(description, state, nesting) <> " .\n"
end
defp full_description_statements(description, state, nesting) do
term(description.subject, state, :subject, nesting)
|> full_description_statements(description, state, nesting)
end
defp blank_node_property_list(description, state, nesting) do
indented = nesting + @indentation
if Enum.empty?(description) do
"[]"
else
"[" <>
newline_indent(indented) <>
predications(description, state, indented) <>
newline_indent(nesting) <> "]"
end
end
defp predications(description, state, nesting) do
description.predications
|> order_predications()
|> Enum.map(&predication(&1, state, nesting))
|> Enum.join(" ;" <> newline_indent(nesting))
end
@dialyzer {:nowarn_function, order_predications: 1}
defp order_predications(predications) do
sorted_predications =
@predicate_order
|> Enum.map(fn predicate -> {predicate, predications[predicate]} end)
|> Enum.reject(fn {_, objects} -> is_nil(objects) end)
unsorted_predications =
Enum.reject(predications, fn {predicate, _} ->
MapSet.member?(@ordered_properties, predicate)
end)
sorted_predications ++ unsorted_predications
end
defp predication({predicate, objects}, state, nesting) do
term(predicate, state, :predicate, nesting) <> " " <> objects(objects, state, nesting)
end
defp objects(objects, state, nesting) do
{objects, with_annotations} =
Enum.map_reduce(objects, false, fn {object, annotation}, with_annotations ->
if annotation do
{
term(object, state, :object, nesting) <>
" {| #{predications(annotation, state, nesting + 2 * @indentation)} |}",
true
}
else
{term(object, state, :object, nesting), with_annotations}
end
end)
# TODO: split if the line gets too long
separator =
if with_annotations,
do: "," <> newline_indent(nesting + @indentation),
else: ", "
Enum.join(objects, separator)
end
defp unrefed_bnode_subject_term(bnode_description, ref_count, state, nesting) do
if valid_list_node?(bnode_description.subject, state) do
case ref_count do
0 ->
bnode_description.subject
|> list_term(state, nesting)
|> full_description_statements(
list_subject_description(bnode_description),
state,
nesting
)
1 ->
nil
_ ->
raise "Internal error: This shouldn't happen. Please raise an issue in the RDF.ex project with the input document causing this error."
end
else
case ref_count do
0 ->
blank_node_property_list(bnode_description, state, nesting) <> " .\n"
1 ->
nil
_ ->
raise "Internal error: This shouldn't happen. Please raise an issue in the RDF.ex project with the input document causing this error."
end
end
end
@dialyzer {:nowarn_function, list_subject_description: 1}
defp list_subject_description(description) do
description = Description.delete_predicates(description, [RDF.first(), RDF.rest()])
if Enum.count(description.predications) == 0 do
# since the Turtle grammar doesn't allow bare lists, we add a statement
description |> RDF.type(RDF.List)
else
description
end
end
defp unrefed_bnode_object_term(bnode, ref_count, state, nesting) do
if valid_list_node?(bnode, state) do
list_term(bnode, state, nesting)
else
if ref_count == 1 do
State.data(state)
|> RDF.Data.description(bnode)
|> blank_node_property_list(state, nesting)
else
raise "Internal error: This shouldn't happen. Please raise an issue in the RDF.ex project with the input document causing this error."
end
end
end
defp valid_list_node?(bnode, state) do
MapSet.member?(State.list_nodes(state), bnode)
end
defp list_term(head, state, nesting) do
head
|> State.list_values(state)
|> term(state, :list, nesting)
end
defp term(@rdf_type, _, :predicate, _), do: "a"
defp term(@rdf_nil, _, _, _), do: "()"
defp term(%IRI{} = iri, state, _, _) do
based_name(iri, State.base(state)) ||
prefixed_name(iri, State.prefixes(state)) ||
"<#{to_string(iri)}>"
end
defp term(%BlankNode{} = bnode, state, position, nesting)
when position in ~w[object list]a do
if (ref_count = State.bnode_ref_counter(state, bnode)) <= 1 do
unrefed_bnode_object_term(bnode, ref_count, state, nesting)
else
to_string(bnode)
end
end
defp term(%BlankNode{} = bnode, _, _, _),
do: to_string(bnode)
defp term(%Literal{literal: %LangString{} = lang_string}, _, _, _) do
quoted(lang_string.value) <> "@" <> lang_string.language
end
defp term(%Literal{literal: %XSD.String{}} = literal, _, _, _) do
literal |> Literal.lexical() |> quoted()
end
defp term(%Literal{literal: %datatype{}} = literal, state, _, nesting)
when datatype in @native_supported_datatypes do
if Literal.valid?(literal) do
Literal.canonical_lexical(literal)
else
typed_literal_term(literal, state, nesting)
end
end
defp term(%Literal{} = literal, state, _, nesting),
do: typed_literal_term(literal, state, nesting)
defp term({s, p, o}, state, _, nesting) do
"<< #{term(s, state, :subject, nesting)} #{term(p, state, :predicate, nesting)} #{term(o, state, :object, nesting)} >>"
end
defp term(list, state, _, nesting) when is_list(list) do
"(" <>
(list
|> Enum.map(&term(&1, state, :list, nesting))
|> Enum.join(" ")) <>
")"
end
defp based_name(%IRI{} = iri, base), do: based_name(to_string(iri), base)
defp based_name(_, nil), do: nil
defp based_name(iri, base) do
if String.starts_with?(iri, base) do
"<#{String.slice(iri, String.length(base)..-1)}>"
end
end
defp typed_literal_term(%Literal{} = literal, state, nesting) do
~s["#{Literal.lexical(literal)}"^^#{literal |> Literal.datatype_id() |> term(state, :datatype, nesting)}]
end
def prefixed_name(iri, prefixes) do
case PrefixMap.prefix_name_pair(prefixes, iri) do
{prefix, name} -> if valid_pn_local?(name), do: prefix <> ":" <> name
_ -> nil
end
end
defp valid_pn_local?(name) do
String.match?(name, ~r/^([[:alpha:]]|[[:digit:]]|_|:)*$/u)
end
defp quoted(string) do
if String.contains?(string, ["\n", "\r"]) do
~s["""#{string}"""]
else
~s["#{escape_string(string)}"]
end
end
defp newline_indent(nesting),
do: "\n" <> String.duplicate(@indentation_char, nesting)
defp indent(opts) when is_list(opts), do: opts |> Keyword.get(:indent) |> indent()
defp indent(nil), do: ""
defp indent(count), do: String.duplicate(" ", count)
end