Add base URI handling to Turtle decoder
This commit is contained in:
parent
a0b18dcfa3
commit
cbc2b0f122
5 changed files with 113 additions and 11 deletions
|
@ -6,6 +6,7 @@ defmodule RDF.Serialization.ParseHelper do
|
||||||
@rdf_type RDF.uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")
|
@rdf_type RDF.uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")
|
||||||
def rdf_type, do: @rdf_type
|
def rdf_type, do: @rdf_type
|
||||||
|
|
||||||
|
|
||||||
def to_uri_string({:iriref, line, value}), do: value
|
def to_uri_string({:iriref, line, value}), do: value
|
||||||
|
|
||||||
def to_uri({:iriref, line, value}) do
|
def to_uri({:iriref, line, value}) do
|
||||||
|
@ -15,6 +16,14 @@ defmodule RDF.Serialization.ParseHelper do
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def to_absolute_or_relative_uri({:iriref, line, value}) do
|
||||||
|
case URI.parse(value) do
|
||||||
|
uri = %URI{scheme: scheme} when not is_nil(scheme) -> uri
|
||||||
|
_ -> {:relative_uri, value}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
def to_bnode({:blank_node_label, _line, value}), do: RDF.bnode(value)
|
def to_bnode({:blank_node_label, _line, value}), do: RDF.bnode(value)
|
||||||
def to_bnode({:anon, _line}), do: RDF.bnode # TODO:
|
def to_bnode({:anon, _line}), do: RDF.bnode # TODO:
|
||||||
|
|
||||||
|
|
|
@ -20,10 +20,16 @@ defmodule RDF.Turtle.Decoder do
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def decode(content, _opts \\ []) do
|
def decode(content, opts \\ %{})
|
||||||
|
|
||||||
|
def decode(content, opts) when is_list(opts),
|
||||||
|
do: decode(content, Map.new(opts))
|
||||||
|
|
||||||
|
def decode(content, opts) do
|
||||||
with {:ok, tokens, _} <- tokenize(content),
|
with {:ok, tokens, _} <- tokenize(content),
|
||||||
{:ok, ast} <- parse(tokens) do
|
{:ok, ast} <- parse(tokens),
|
||||||
{:ok, build_graph(ast)}
|
base = Map.get(opts, :base) do
|
||||||
|
{:ok, build_graph(ast, base && RDF.uri(base))}
|
||||||
else
|
else
|
||||||
{:error, {error_line, :turtle_lexer, error_descriptor}, _error_line_again} ->
|
{:error, {error_line, :turtle_lexer, error_descriptor}, _error_line_again} ->
|
||||||
{:error, "Turtle scanner error on line #{error_line}: #{inspect error_descriptor}"}
|
{:error, "Turtle scanner error on line #{error_line}: #{inspect error_descriptor}"}
|
||||||
|
@ -37,9 +43,9 @@ defmodule RDF.Turtle.Decoder do
|
||||||
defp parse([]), do: {:ok, []}
|
defp parse([]), do: {:ok, []}
|
||||||
defp parse(tokens), do: tokens |> :turtle_parser.parse
|
defp parse(tokens), do: tokens |> :turtle_parser.parse
|
||||||
|
|
||||||
defp build_graph(ast) do
|
defp build_graph(ast, base) do
|
||||||
{graph, _} =
|
{graph, _} =
|
||||||
Enum.reduce ast, {RDF.Graph.new, %State{}}, fn
|
Enum.reduce ast, {RDF.Graph.new, %State{base_uri: base}}, fn
|
||||||
{:triples, triples_ast}, {graph, state} ->
|
{:triples, triples_ast}, {graph, state} ->
|
||||||
with {statements, state} = triples(triples_ast, state) do
|
with {statements, state} = triples(triples_ast, state) do
|
||||||
{RDF.Graph.add(graph, statements), state}
|
{RDF.Graph.add(graph, statements), state}
|
||||||
|
@ -57,7 +63,7 @@ defmodule RDF.Turtle.Decoder do
|
||||||
end
|
end
|
||||||
|
|
||||||
defp directive({:base, uri}, state) do
|
defp directive({:base, uri}, state) do
|
||||||
%State{state | base_uri: uri}
|
%State{state | base_uri: RDF.uri(uri)}
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
@ -85,6 +91,14 @@ defmodule RDF.Turtle.Decoder do
|
||||||
{RDF.uri(State.ns(state, prefix) <> name), statements, state}
|
{RDF.uri(State.ns(state, prefix) <> name), statements, state}
|
||||||
end
|
end
|
||||||
|
|
||||||
|
defp resolve_node({:relative_uri, relative_uri}, _, %State{base_uri: nil}) do
|
||||||
|
raise "Could not resolve resolve relative IRI '#{relative_uri}', no base uri provided"
|
||||||
|
end
|
||||||
|
|
||||||
|
defp resolve_node({:relative_uri, relative_uri}, statements, state) do
|
||||||
|
{RDF.URI.Helper.absolute_iri(relative_uri, state.base_uri), statements, state}
|
||||||
|
end
|
||||||
|
|
||||||
defp resolve_node({:anon}, statements, state) do
|
defp resolve_node({:anon}, statements, state) do
|
||||||
with {node, state} = State.next_bnode(state) do
|
with {node, state} = State.next_bnode(state) do
|
||||||
{node, statements, state}
|
{node, statements, state}
|
||||||
|
|
29
lib/rdf/uri_helper.ex
Normal file
29
lib/rdf/uri_helper.ex
Normal file
|
@ -0,0 +1,29 @@
|
||||||
|
defmodule RDF.URI.Helper do
|
||||||
|
@moduledoc """
|
||||||
|
Some helpers functions for working with URIs.
|
||||||
|
|
||||||
|
These functions should be part of a dedicated RDF.IRI implementation.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
@doc """
|
||||||
|
Resolves a relative IRI against a base IRI.
|
||||||
|
|
||||||
|
as specified in [section 5.1 Establishing a Base URI of RFC3986](http://tools.ietf.org/html/rfc3986#section-5.1).
|
||||||
|
Only the basic algorithm in [section 5.2 of RFC3986](http://tools.ietf.org/html/rfc3986#section-5.2)
|
||||||
|
is used; neither Syntax-Based Normalization nor Scheme-Based Normalization are performed.
|
||||||
|
|
||||||
|
Characters additionally allowed in IRI references are treated in the same way that unreserved
|
||||||
|
characters are treated in URI references, per [section 6.5 of RFC3987](http://tools.ietf.org/html/rfc3987#section-6.5)
|
||||||
|
"""
|
||||||
|
def absolute_iri(value, base_iri) do
|
||||||
|
case URI.parse(value) do
|
||||||
|
# absolute?
|
||||||
|
uri = %URI{scheme: scheme} when not is_nil(scheme) -> uri
|
||||||
|
# relative
|
||||||
|
_ when is_nil(base_iri) -> nil
|
||||||
|
_ -> URI.merge(base_iri, value)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
|
@ -84,11 +84,7 @@ blankNode -> anon : {anon} .
|
||||||
Erlang code.
|
Erlang code.
|
||||||
|
|
||||||
to_uri_string(IRIREF) -> 'Elixir.RDF.Serialization.ParseHelper':to_uri_string(IRIREF) .
|
to_uri_string(IRIREF) -> 'Elixir.RDF.Serialization.ParseHelper':to_uri_string(IRIREF) .
|
||||||
to_uri(IRIREF) ->
|
to_uri(IRIREF) -> 'Elixir.RDF.Serialization.ParseHelper':to_absolute_or_relative_uri(IRIREF) .
|
||||||
case 'Elixir.RDF.Serialization.ParseHelper':to_uri(IRIREF) of
|
|
||||||
{ok, URI} -> URI;
|
|
||||||
{error, ErrorLine, Message} -> return_error(ErrorLine, Message)
|
|
||||||
end.
|
|
||||||
to_bnode(BLANK_NODE) -> 'Elixir.RDF.Serialization.ParseHelper':to_bnode(BLANK_NODE).
|
to_bnode(BLANK_NODE) -> 'Elixir.RDF.Serialization.ParseHelper':to_bnode(BLANK_NODE).
|
||||||
to_literal(STRING_LITERAL_QUOTE) -> 'Elixir.RDF.Serialization.ParseHelper':to_literal(STRING_LITERAL_QUOTE).
|
to_literal(STRING_LITERAL_QUOTE) -> 'Elixir.RDF.Serialization.ParseHelper':to_literal(STRING_LITERAL_QUOTE).
|
||||||
to_literal(STRING_LITERAL_QUOTE, Type) -> 'Elixir.RDF.Serialization.ParseHelper':to_literal(STRING_LITERAL_QUOTE, Type).
|
to_literal(STRING_LITERAL_QUOTE, Type) -> 'Elixir.RDF.Serialization.ParseHelper':to_literal(STRING_LITERAL_QUOTE, Type).
|
||||||
|
|
|
@ -312,4 +312,58 @@ defmodule RDF.Turtle.DecoderTest do
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
describe "relative IRIs" do
|
||||||
|
test "without explicit in-doc base and no document_base option option given" do
|
||||||
|
assert_raise RuntimeError, fn ->
|
||||||
|
Turtle.Decoder.decode!(
|
||||||
|
"<#Aaron> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <#Person> .")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
test "without explicit in-doc base, but document_base option given" do
|
||||||
|
assert Turtle.Decoder.decode!("""
|
||||||
|
<#Aaron> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <#Person> .
|
||||||
|
""", base: "http://example.org/") == Graph.new({EX.Aaron, RDF.type, EX.Person})
|
||||||
|
end
|
||||||
|
|
||||||
|
test "with @base given" do
|
||||||
|
assert Turtle.Decoder.decode!("""
|
||||||
|
@base <http://example.org/> .
|
||||||
|
<#Aaron> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <#Person> .
|
||||||
|
""") == Graph.new({EX.Aaron, RDF.type, EX.Person})
|
||||||
|
|
||||||
|
assert Turtle.Decoder.decode!("""
|
||||||
|
@base <http://example.org/#> .
|
||||||
|
<#Aaron> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <#Person> .
|
||||||
|
""") == Graph.new({EX.Aaron, RDF.type, EX.Person})
|
||||||
|
end
|
||||||
|
|
||||||
|
test "with BASE given" do
|
||||||
|
assert Turtle.Decoder.decode!("""
|
||||||
|
BASE <http://example.org/>
|
||||||
|
<#Aaron> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <#Person> .
|
||||||
|
""") == Graph.new({EX.Aaron, RDF.type, EX.Person})
|
||||||
|
|
||||||
|
assert Turtle.Decoder.decode!("""
|
||||||
|
base <http://example.org/#>
|
||||||
|
<#Aaron> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <#Person> .
|
||||||
|
""") == Graph.new({EX.Aaron, RDF.type, EX.Person})
|
||||||
|
end
|
||||||
|
|
||||||
|
test "when a given base is itself relative" do
|
||||||
|
assert_raise RDF.InvalidURIError, fn ->
|
||||||
|
Turtle.Decoder.decode!("""
|
||||||
|
@base <foo> .
|
||||||
|
<#Aaron> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <#Person> .
|
||||||
|
""")
|
||||||
|
end
|
||||||
|
assert_raise RDF.InvalidURIError, fn ->
|
||||||
|
Turtle.Decoder.decode!(
|
||||||
|
"<#Aaron> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <#Person> .",
|
||||||
|
base: "foo")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in a new issue