defmodule RDF.Turtle.Encoder do @moduledoc """ An encoder for Turtle serializations of RDF.ex data structures. As for all encoders of `RDF.Serialization.Format`s, you normally won't use these functions directly, but via one of the `write_` functions on the `RDF.Turtle` format module or the generic `RDF.Serialization` module. ## Options - `:prefixes`: Allows to specify the prefixes to be used as a `RDF.PrefixMap` or anything from which a `RDF.PrefixMap` can be created with `RDF.PrefixMap.new/1`. If not specified the ones from the given graph are used or if these are also not present the `RDF.default_prefixes/0`. - `:base`: : Allows to specify the base URI to be used for a `@base` directive. If not specified the one from the given graph is used or if there is also none specified for the graph the `RDF.default_base_iri/0`. - `:implicit_base`: This boolean flag allows to use a base URI to get relative IRIs without embedding it explicitly in the content with a `@base` directive, so that the URIs will be resolved according to the remaining strategy specified in section 5.1 of [RFC3986](https://www.ietf.org/rfc/rfc3986.txt) (default: `false`). - `:base_description`: Allows to provide a description of the resource denoted by the base URI. This option is especially useful when the base URI is actually not specified, eg. in the common use case of wanting to describe the Turtle document itself, which should be denoted by the URL where it is hosted as the implicit base URI. - `:only`: Allows to specify which parts of a Turtle document should be generated. Possible values: `:base`, `:prefixes`, `:directives` (means the same as `[:base, :prefixes]`), `:triples` or a list with any combination of these values. - `:indent`: Allows to specify the number of spaces the output should be indented. """ use RDF.Serialization.Encoder alias RDF.Turtle.Encoder.State alias RDF.Turtle.Star.CompactGraph alias RDF.{BlankNode, Description, Graph, IRI, XSD, Literal, LangString, PrefixMap} import RDF.NTriples.Encoder, only: [escape_string: 1] @document_structure [ :base, :prefixes, :triples ] @indentation_char " " @indentation 4 @native_supported_datatypes [ XSD.Boolean, XSD.Integer, XSD.Double, XSD.Decimal ] @rdf_type RDF.Utils.Bootstrapping.rdf_iri("type") @rdf_nil RDF.Utils.Bootstrapping.rdf_iri("nil") # Defines rdf:type of subjects to be serialized at the beginning of the encoded graph @top_classes [RDF.Utils.Bootstrapping.rdfs_iri("Class")] # Defines order of predicates at the beginning of a resource description @predicate_order [ @rdf_type, RDF.Utils.Bootstrapping.rdfs_iri("label"), RDF.iri("http://purl.org/dc/terms/title") ] @ordered_properties MapSet.new(@predicate_order) @implicit_default_base "http://this-implicit-default-base-iri-should-never-appear-in-a-document" @impl RDF.Serialization.Encoder @spec encode(Graph.t() | Description.t(), keyword) :: {:ok, String.t()} | {:error, any} def encode(data, opts \\ []) def encode(%Description{} = description, opts), do: description |> Graph.new() |> encode(opts) def encode(%Graph{} = graph, opts) do base = Keyword.get(opts, :base, Keyword.get(opts, :base_iri)) |> base_iri(graph) |> init_base_iri() prefixes = Keyword.get(opts, :prefixes) |> prefixes(graph) {graph, base, opts} = add_base_description(graph, base, Keyword.get(opts, :base_description), opts) {:ok, state} = State.start_link(graph, base, prefixes) try do State.preprocess(state) {:ok, (Keyword.get(opts, :only) || @document_structure) |> compile(base, prefixes, state, opts)} after State.stop(state) end end defp compile(:base, base, _, _, opts), do: base_directive(base, opts) defp compile(:prefixes, _, prefixes, _, opts), do: prefix_directives(prefixes, opts) defp compile(:triples, _, _, state, opts), do: graph_statements(state, opts) defp compile(:directives, base, prefixes, state, opts), do: [:base, :prefixes] |> compile(base, prefixes, state, opts) defp compile(elements, base, prefixes, state, opts) when is_list(elements) do Enum.map_join(elements, &compile(&1, base, prefixes, state, opts)) end defp compile(element, _, _, _, _) do raise "unknown Turtle document element: #{inspect(element)}" end defp base_iri(nil, %Graph{base_iri: base_iri}) when not is_nil(base_iri), do: base_iri defp base_iri(nil, _), do: RDF.default_base_iri() defp base_iri(base_iri, _), do: IRI.coerce_base(base_iri) defp init_base_iri(nil), do: nil defp init_base_iri(base_iri), do: to_string(base_iri) defp prefixes(nil, %Graph{prefixes: prefixes}) when not is_nil(prefixes), do: prefixes defp prefixes(nil, _), do: RDF.default_prefixes() defp prefixes(prefixes, _), do: PrefixMap.new(prefixes) defp base_directive(nil, _), do: "" defp base_directive(base, opts) do if Keyword.get(opts, :implicit_base, false) do "" else indent(opts) <> case Keyword.get(opts, :directive_style) do :sparql -> "BASE <#{base}>" _ -> "@base <#{base}> ." end <> "\n\n" end end defp prefix_directive({prefix, ns}, opts) do indent(opts) <> case Keyword.get(opts, :directive_style) do :sparql -> "PREFIX #{prefix}: <#{to_string(ns)}>\n" _ -> "@prefix #{prefix}: <#{to_string(ns)}> .\n" end end defp prefix_directives(prefixes, opts) do case Enum.map(prefixes, &prefix_directive(&1, opts)) do [] -> "" prefixes -> Enum.join(prefixes, "") <> "\n" end end defp add_base_description(graph, base, nil, opts), do: {graph, base, opts} defp add_base_description(graph, nil, base_description, opts) do add_base_description( graph, @implicit_default_base, base_description, Keyword.put(opts, :implicit_base, true) ) end defp add_base_description(graph, base, base_description, opts) do {Graph.add(graph, Description.new(base, init: base_description)), base, opts} end defp graph_statements(state, opts) do indent = indent(opts) State.data(state) |> CompactGraph.compact() |> RDF.Data.descriptions() |> order_descriptions(state) |> Enum.map(&description_statements(&1, state, Keyword.get(opts, :indent, 0))) |> Enum.reject(&is_nil/1) |> Enum.map_join("\n", &(indent <> &1)) end defp order_descriptions(descriptions, state) do base_iri = State.base_iri(state) group = Enum.group_by(descriptions, &description_group(&1, base_iri)) ordered_descriptions = (@top_classes |> Stream.map(&group[&1]) |> Stream.reject(&is_nil/1) |> Enum.flat_map(&sort_descriptions/1)) ++ (group |> Map.get(:other, []) |> sort_descriptions()) case group[:base] do [base] -> [base | ordered_descriptions] _ -> ordered_descriptions end end defp description_group(%{subject: base_iri}, base_iri), do: :base defp description_group(description, _) do if types = description.predications[@rdf_type] do Enum.find(@top_classes, :other, &Map.has_key?(types, &1)) else :other end end defp sort_descriptions(descriptions), do: Enum.sort(descriptions, &description_order/2) defp description_order(%{subject: %IRI{}}, %{subject: %BlankNode{}}), do: true defp description_order(%{subject: %BlankNode{}}, %{subject: %IRI{}}), do: false defp description_order(%{subject: {s, p, o1}}, %{subject: {s, p, o2}}), do: to_string(o1) < to_string(o2) defp description_order(%{subject: {s, p1, _}}, %{subject: {s, p2, _}}), do: to_string(p1) < to_string(p2) defp description_order(%{subject: {s1, _, _}}, %{subject: {s2, _, _}}), do: to_string(s1) < to_string(s2) defp description_order(%{subject: {_, _, _}}, %{subject: _}), do: false defp description_order(%{subject: _}, %{subject: {_, _, _}}), do: true defp description_order(%{subject: s1}, %{subject: s2}), do: to_string(s1) < to_string(s2) defp description_statements(description, state, nesting) do if Description.empty?(description) do raise Graph.EmptyDescriptionError, subject: description.subject else with %BlankNode{} <- description.subject, ref_count when ref_count < 2 <- State.bnode_ref_counter(state, description.subject) do unrefed_bnode_subject_term(description, ref_count, state, nesting) else _ -> full_description_statements(description, state, nesting) end end end defp full_description_statements(subject, description, state, nesting) do nesting = nesting + @indentation subject <> newline_indent(nesting) <> predications(description, state, nesting) <> " .\n" end defp full_description_statements(description, state, nesting) do term(description.subject, state, :subject, nesting) |> full_description_statements(description, state, nesting) end defp blank_node_property_list(description, state, nesting) do indented = nesting + @indentation if Description.empty?(description) do "[]" else "[" <> newline_indent(indented) <> predications(description, state, indented) <> newline_indent(nesting) <> "]" end end defp predications(description, state, nesting) do description.predications |> order_predications() |> Enum.map(&predication(&1, state, nesting)) |> Enum.join(" ;" <> newline_indent(nesting)) end @dialyzer {:nowarn_function, order_predications: 1} defp order_predications(predications) do sorted_predications = @predicate_order |> Enum.map(fn predicate -> {predicate, predications[predicate]} end) |> Enum.reject(fn {_, objects} -> is_nil(objects) end) unsorted_predications = Enum.reject(predications, fn {predicate, _} -> MapSet.member?(@ordered_properties, predicate) end) sorted_predications ++ unsorted_predications end defp predication({predicate, objects}, state, nesting) do term(predicate, state, :predicate, nesting) <> " " <> objects(objects, state, nesting) end defp objects(objects, state, nesting) do {objects, with_annotations} = Enum.map_reduce(objects, false, fn {object, annotation}, with_annotations -> if annotation do { term(object, state, :object, nesting) <> " {| #{predications(annotation, state, nesting + 2 * @indentation)} |}", true } else {term(object, state, :object, nesting), with_annotations} end end) # TODO: split if the line gets too long separator = if with_annotations, do: "," <> newline_indent(nesting + @indentation), else: ", " Enum.join(objects, separator) end defp unrefed_bnode_subject_term(bnode_description, ref_count, state, nesting) do if valid_list_node?(bnode_description.subject, state) do case ref_count do 0 -> bnode_description.subject |> list_term(state, nesting) |> full_description_statements( list_subject_description(bnode_description), state, nesting ) 1 -> nil _ -> raise "Internal error: This shouldn't happen. Please raise an issue in the RDF.ex project with the input document causing this error." end else case ref_count do 0 -> blank_node_property_list(bnode_description, state, nesting) <> " .\n" 1 -> nil _ -> raise "Internal error: This shouldn't happen. Please raise an issue in the RDF.ex project with the input document causing this error." end end end @dialyzer {:nowarn_function, list_subject_description: 1} defp list_subject_description(description) do description = Description.delete_predicates(description, [RDF.first(), RDF.rest()]) if Description.empty?(description) do # since the Turtle grammar doesn't allow bare lists, we add a statement description |> RDF.type(RDF.List) else description end end defp unrefed_bnode_object_term(bnode, ref_count, state, nesting) do if valid_list_node?(bnode, state) do list_term(bnode, state, nesting) else if ref_count == 1 do State.data(state) |> RDF.Data.description(bnode) |> blank_node_property_list(state, nesting) else raise "Internal error: This shouldn't happen. Please raise an issue in the RDF.ex project with the input document causing this error." end end end defp valid_list_node?(bnode, state) do MapSet.member?(State.list_nodes(state), bnode) end defp list_term(head, state, nesting) do head |> State.list_values(state) |> term(state, :list, nesting) end defp term(@rdf_type, _, :predicate, _), do: "a" defp term(@rdf_nil, _, _, _), do: "()" defp term(%IRI{} = iri, state, _, _) do based_name(iri, State.base(state)) || prefixed_name(iri, State.prefixes(state)) || "<#{to_string(iri)}>" end defp term(%BlankNode{} = bnode, state, position, nesting) when position in ~w[object list]a do if (ref_count = State.bnode_ref_counter(state, bnode)) <= 1 do unrefed_bnode_object_term(bnode, ref_count, state, nesting) else to_string(bnode) end end defp term(%BlankNode{} = bnode, _, _, _), do: to_string(bnode) defp term(%Literal{literal: %LangString{} = lang_string}, _, _, _) do quoted(lang_string.value) <> "@" <> lang_string.language end defp term(%Literal{literal: %XSD.String{}} = literal, _, _, _) do literal |> Literal.lexical() |> quoted() end defp term(%Literal{literal: %datatype{}} = literal, state, _, nesting) when datatype in @native_supported_datatypes do if Literal.valid?(literal) do Literal.canonical_lexical(literal) else typed_literal_term(literal, state, nesting) end end defp term(%Literal{} = literal, state, _, nesting), do: typed_literal_term(literal, state, nesting) defp term({s, p, o}, state, _, nesting) do "<< #{term(s, state, :subject, nesting)} #{term(p, state, :predicate, nesting)} #{term(o, state, :object, nesting)} >>" end defp term(list, state, _, nesting) when is_list(list) do "(" <> (list |> Enum.map(&term(&1, state, :list, nesting)) |> Enum.join(" ")) <> ")" end defp based_name(%IRI{} = iri, base), do: based_name(to_string(iri), base) defp based_name(_, nil), do: nil defp based_name(iri, base) do if String.starts_with?(iri, base) do "<#{String.slice(iri, String.length(base)..-1)}>" end end defp typed_literal_term(%Literal{} = literal, state, nesting) do ~s["#{Literal.lexical(literal)}"^^#{literal |> Literal.datatype_id() |> term(state, :datatype, nesting)}] end def prefixed_name(iri, prefixes) do case PrefixMap.prefix_name_pair(prefixes, iri) do {prefix, name} -> if valid_pn_local?(name), do: prefix <> ":" <> name _ -> nil end end defp valid_pn_local?(name) do String.match?(name, ~r/^([[:alpha:]]|[[:digit:]]|_|:)*$/u) end defp quoted(string) do if String.contains?(string, ["\n", "\r"]) do ~s["""#{string}"""] else ~s["#{escape_string(string)}"] end end defp newline_indent(nesting), do: "\n" <> String.duplicate(@indentation_char, nesting) defp indent(opts) when is_list(opts), do: opts |> Keyword.get(:indent) |> indent() defp indent(nil), do: "" defp indent(count), do: String.duplicate(" ", count) end