Add base URI handling to Turtle decoder

This commit is contained in:
Marcel Otto 2017-07-07 18:00:09 +02:00
parent a0b18dcfa3
commit cbc2b0f122
5 changed files with 113 additions and 11 deletions

View file

@ -6,6 +6,7 @@ defmodule RDF.Serialization.ParseHelper do
@rdf_type RDF.uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type") @rdf_type RDF.uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")
def rdf_type, do: @rdf_type def rdf_type, do: @rdf_type
def to_uri_string({:iriref, line, value}), do: value def to_uri_string({:iriref, line, value}), do: value
def to_uri({:iriref, line, value}) do def to_uri({:iriref, line, value}) do
@ -15,6 +16,14 @@ defmodule RDF.Serialization.ParseHelper do
end end
end end
def to_absolute_or_relative_uri({:iriref, line, value}) do
case URI.parse(value) do
uri = %URI{scheme: scheme} when not is_nil(scheme) -> uri
_ -> {:relative_uri, value}
end
end
def to_bnode({:blank_node_label, _line, value}), do: RDF.bnode(value) def to_bnode({:blank_node_label, _line, value}), do: RDF.bnode(value)
def to_bnode({:anon, _line}), do: RDF.bnode # TODO: def to_bnode({:anon, _line}), do: RDF.bnode # TODO:

View file

@ -20,10 +20,16 @@ defmodule RDF.Turtle.Decoder do
end end
end end
def decode(content, _opts \\ []) do def decode(content, opts \\ %{})
def decode(content, opts) when is_list(opts),
do: decode(content, Map.new(opts))
def decode(content, opts) do
with {:ok, tokens, _} <- tokenize(content), with {:ok, tokens, _} <- tokenize(content),
{:ok, ast} <- parse(tokens) do {:ok, ast} <- parse(tokens),
{:ok, build_graph(ast)} base = Map.get(opts, :base) do
{:ok, build_graph(ast, base && RDF.uri(base))}
else else
{:error, {error_line, :turtle_lexer, error_descriptor}, _error_line_again} -> {:error, {error_line, :turtle_lexer, error_descriptor}, _error_line_again} ->
{:error, "Turtle scanner error on line #{error_line}: #{inspect error_descriptor}"} {:error, "Turtle scanner error on line #{error_line}: #{inspect error_descriptor}"}
@ -37,9 +43,9 @@ defmodule RDF.Turtle.Decoder do
defp parse([]), do: {:ok, []} defp parse([]), do: {:ok, []}
defp parse(tokens), do: tokens |> :turtle_parser.parse defp parse(tokens), do: tokens |> :turtle_parser.parse
defp build_graph(ast) do defp build_graph(ast, base) do
{graph, _} = {graph, _} =
Enum.reduce ast, {RDF.Graph.new, %State{}}, fn Enum.reduce ast, {RDF.Graph.new, %State{base_uri: base}}, fn
{:triples, triples_ast}, {graph, state} -> {:triples, triples_ast}, {graph, state} ->
with {statements, state} = triples(triples_ast, state) do with {statements, state} = triples(triples_ast, state) do
{RDF.Graph.add(graph, statements), state} {RDF.Graph.add(graph, statements), state}
@ -57,7 +63,7 @@ defmodule RDF.Turtle.Decoder do
end end
defp directive({:base, uri}, state) do defp directive({:base, uri}, state) do
%State{state | base_uri: uri} %State{state | base_uri: RDF.uri(uri)}
end end
@ -85,6 +91,14 @@ defmodule RDF.Turtle.Decoder do
{RDF.uri(State.ns(state, prefix) <> name), statements, state} {RDF.uri(State.ns(state, prefix) <> name), statements, state}
end end
defp resolve_node({:relative_uri, relative_uri}, _, %State{base_uri: nil}) do
raise "Could not resolve resolve relative IRI '#{relative_uri}', no base uri provided"
end
defp resolve_node({:relative_uri, relative_uri}, statements, state) do
{RDF.URI.Helper.absolute_iri(relative_uri, state.base_uri), statements, state}
end
defp resolve_node({:anon}, statements, state) do defp resolve_node({:anon}, statements, state) do
with {node, state} = State.next_bnode(state) do with {node, state} = State.next_bnode(state) do
{node, statements, state} {node, statements, state}

29
lib/rdf/uri_helper.ex Normal file
View file

@ -0,0 +1,29 @@
defmodule RDF.URI.Helper do
@moduledoc """
Some helpers functions for working with URIs.
These functions should be part of a dedicated RDF.IRI implementation.
"""
@doc """
Resolves a relative IRI against a base IRI.
as specified in [section 5.1 Establishing a Base URI of RFC3986](http://tools.ietf.org/html/rfc3986#section-5.1).
Only the basic algorithm in [section 5.2 of RFC3986](http://tools.ietf.org/html/rfc3986#section-5.2)
is used; neither Syntax-Based Normalization nor Scheme-Based Normalization are performed.
Characters additionally allowed in IRI references are treated in the same way that unreserved
characters are treated in URI references, per [section 6.5 of RFC3987](http://tools.ietf.org/html/rfc3987#section-6.5)
"""
def absolute_iri(value, base_iri) do
case URI.parse(value) do
# absolute?
uri = %URI{scheme: scheme} when not is_nil(scheme) -> uri
# relative
_ when is_nil(base_iri) -> nil
_ -> URI.merge(base_iri, value)
end
end
end

View file

@ -84,11 +84,7 @@ blankNode -> anon : {anon} .
Erlang code. Erlang code.
to_uri_string(IRIREF) -> 'Elixir.RDF.Serialization.ParseHelper':to_uri_string(IRIREF) . to_uri_string(IRIREF) -> 'Elixir.RDF.Serialization.ParseHelper':to_uri_string(IRIREF) .
to_uri(IRIREF) -> to_uri(IRIREF) -> 'Elixir.RDF.Serialization.ParseHelper':to_absolute_or_relative_uri(IRIREF) .
case 'Elixir.RDF.Serialization.ParseHelper':to_uri(IRIREF) of
{ok, URI} -> URI;
{error, ErrorLine, Message} -> return_error(ErrorLine, Message)
end.
to_bnode(BLANK_NODE) -> 'Elixir.RDF.Serialization.ParseHelper':to_bnode(BLANK_NODE). to_bnode(BLANK_NODE) -> 'Elixir.RDF.Serialization.ParseHelper':to_bnode(BLANK_NODE).
to_literal(STRING_LITERAL_QUOTE) -> 'Elixir.RDF.Serialization.ParseHelper':to_literal(STRING_LITERAL_QUOTE). to_literal(STRING_LITERAL_QUOTE) -> 'Elixir.RDF.Serialization.ParseHelper':to_literal(STRING_LITERAL_QUOTE).
to_literal(STRING_LITERAL_QUOTE, Type) -> 'Elixir.RDF.Serialization.ParseHelper':to_literal(STRING_LITERAL_QUOTE, Type). to_literal(STRING_LITERAL_QUOTE, Type) -> 'Elixir.RDF.Serialization.ParseHelper':to_literal(STRING_LITERAL_QUOTE, Type).

View file

@ -312,4 +312,58 @@ defmodule RDF.Turtle.DecoderTest do
end end
end end
describe "relative IRIs" do
test "without explicit in-doc base and no document_base option option given" do
assert_raise RuntimeError, fn ->
Turtle.Decoder.decode!(
"<#Aaron> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <#Person> .")
end
end
test "without explicit in-doc base, but document_base option given" do
assert Turtle.Decoder.decode!("""
<#Aaron> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <#Person> .
""", base: "http://example.org/") == Graph.new({EX.Aaron, RDF.type, EX.Person})
end
test "with @base given" do
assert Turtle.Decoder.decode!("""
@base <http://example.org/> .
<#Aaron> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <#Person> .
""") == Graph.new({EX.Aaron, RDF.type, EX.Person})
assert Turtle.Decoder.decode!("""
@base <http://example.org/#> .
<#Aaron> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <#Person> .
""") == Graph.new({EX.Aaron, RDF.type, EX.Person})
end
test "with BASE given" do
assert Turtle.Decoder.decode!("""
BASE <http://example.org/>
<#Aaron> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <#Person> .
""") == Graph.new({EX.Aaron, RDF.type, EX.Person})
assert Turtle.Decoder.decode!("""
base <http://example.org/#>
<#Aaron> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <#Person> .
""") == Graph.new({EX.Aaron, RDF.type, EX.Person})
end
test "when a given base is itself relative" do
assert_raise RDF.InvalidURIError, fn ->
Turtle.Decoder.decode!("""
@base <foo> .
<#Aaron> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <#Person> .
""")
end
assert_raise RDF.InvalidURIError, fn ->
Turtle.Decoder.decode!(
"<#Aaron> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <#Person> .",
base: "foo")
end
end
end
end end