From cbc2b0f122dfe98ea9c7bcb811473b1df5ca4db6 Mon Sep 17 00:00:00 2001 From: Marcel Otto Date: Fri, 7 Jul 2017 18:00:09 +0200 Subject: [PATCH] Add base URI handling to Turtle decoder --- lib/rdf/serialization/parse_helper.ex | 9 ++++ lib/rdf/serializations/turtle_decoder.ex | 26 +++++++++--- lib/rdf/uri_helper.ex | 29 +++++++++++++ src/turtle_parser.yrl | 6 +-- test/unit/turtle_decoder_test.exs | 54 ++++++++++++++++++++++++ 5 files changed, 113 insertions(+), 11 deletions(-) create mode 100644 lib/rdf/uri_helper.ex diff --git a/lib/rdf/serialization/parse_helper.ex b/lib/rdf/serialization/parse_helper.ex index 84a62d4..aa75073 100644 --- a/lib/rdf/serialization/parse_helper.ex +++ b/lib/rdf/serialization/parse_helper.ex @@ -6,6 +6,7 @@ defmodule RDF.Serialization.ParseHelper do @rdf_type RDF.uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type") def rdf_type, do: @rdf_type + def to_uri_string({:iriref, line, value}), do: value def to_uri({:iriref, line, value}) do @@ -15,6 +16,14 @@ defmodule RDF.Serialization.ParseHelper do end end + def to_absolute_or_relative_uri({:iriref, line, value}) do + case URI.parse(value) do + uri = %URI{scheme: scheme} when not is_nil(scheme) -> uri + _ -> {:relative_uri, value} + end + end + + def to_bnode({:blank_node_label, _line, value}), do: RDF.bnode(value) def to_bnode({:anon, _line}), do: RDF.bnode # TODO: diff --git a/lib/rdf/serializations/turtle_decoder.ex b/lib/rdf/serializations/turtle_decoder.ex index ef63931..cb3f9a9 100644 --- a/lib/rdf/serializations/turtle_decoder.ex +++ b/lib/rdf/serializations/turtle_decoder.ex @@ -20,10 +20,16 @@ defmodule RDF.Turtle.Decoder do end end - def decode(content, _opts \\ []) do + def decode(content, opts \\ %{}) + + def decode(content, opts) when is_list(opts), + do: decode(content, Map.new(opts)) + + def decode(content, opts) do with {:ok, tokens, _} <- tokenize(content), - {:ok, ast} <- parse(tokens) do - {:ok, build_graph(ast)} + {:ok, ast} <- parse(tokens), + base = Map.get(opts, :base) do + {:ok, build_graph(ast, base && RDF.uri(base))} else {:error, {error_line, :turtle_lexer, error_descriptor}, _error_line_again} -> {:error, "Turtle scanner error on line #{error_line}: #{inspect error_descriptor}"} @@ -37,9 +43,9 @@ defmodule RDF.Turtle.Decoder do defp parse([]), do: {:ok, []} defp parse(tokens), do: tokens |> :turtle_parser.parse - defp build_graph(ast) do + defp build_graph(ast, base) do {graph, _} = - Enum.reduce ast, {RDF.Graph.new, %State{}}, fn + Enum.reduce ast, {RDF.Graph.new, %State{base_uri: base}}, fn {:triples, triples_ast}, {graph, state} -> with {statements, state} = triples(triples_ast, state) do {RDF.Graph.add(graph, statements), state} @@ -57,7 +63,7 @@ defmodule RDF.Turtle.Decoder do end defp directive({:base, uri}, state) do - %State{state | base_uri: uri} + %State{state | base_uri: RDF.uri(uri)} end @@ -85,6 +91,14 @@ defmodule RDF.Turtle.Decoder do {RDF.uri(State.ns(state, prefix) <> name), statements, state} end + defp resolve_node({:relative_uri, relative_uri}, _, %State{base_uri: nil}) do + raise "Could not resolve resolve relative IRI '#{relative_uri}', no base uri provided" + end + + defp resolve_node({:relative_uri, relative_uri}, statements, state) do + {RDF.URI.Helper.absolute_iri(relative_uri, state.base_uri), statements, state} + end + defp resolve_node({:anon}, statements, state) do with {node, state} = State.next_bnode(state) do {node, statements, state} diff --git a/lib/rdf/uri_helper.ex b/lib/rdf/uri_helper.ex new file mode 100644 index 0000000..0e2ff04 --- /dev/null +++ b/lib/rdf/uri_helper.ex @@ -0,0 +1,29 @@ +defmodule RDF.URI.Helper do + @moduledoc """ + Some helpers functions for working with URIs. + + These functions should be part of a dedicated RDF.IRI implementation. + """ + + + @doc """ + Resolves a relative IRI against a base IRI. + + as specified in [section 5.1 Establishing a Base URI of RFC3986](http://tools.ietf.org/html/rfc3986#section-5.1). + Only the basic algorithm in [section 5.2 of RFC3986](http://tools.ietf.org/html/rfc3986#section-5.2) + is used; neither Syntax-Based Normalization nor Scheme-Based Normalization are performed. + + Characters additionally allowed in IRI references are treated in the same way that unreserved + characters are treated in URI references, per [section 6.5 of RFC3987](http://tools.ietf.org/html/rfc3987#section-6.5) + """ + def absolute_iri(value, base_iri) do + case URI.parse(value) do + # absolute? + uri = %URI{scheme: scheme} when not is_nil(scheme) -> uri + # relative + _ when is_nil(base_iri) -> nil + _ -> URI.merge(base_iri, value) + end + end + +end diff --git a/src/turtle_parser.yrl b/src/turtle_parser.yrl index 270c71b..1c3e9d0 100644 --- a/src/turtle_parser.yrl +++ b/src/turtle_parser.yrl @@ -84,11 +84,7 @@ blankNode -> anon : {anon} . Erlang code. to_uri_string(IRIREF) -> 'Elixir.RDF.Serialization.ParseHelper':to_uri_string(IRIREF) . -to_uri(IRIREF) -> - case 'Elixir.RDF.Serialization.ParseHelper':to_uri(IRIREF) of - {ok, URI} -> URI; - {error, ErrorLine, Message} -> return_error(ErrorLine, Message) - end. +to_uri(IRIREF) -> 'Elixir.RDF.Serialization.ParseHelper':to_absolute_or_relative_uri(IRIREF) . to_bnode(BLANK_NODE) -> 'Elixir.RDF.Serialization.ParseHelper':to_bnode(BLANK_NODE). to_literal(STRING_LITERAL_QUOTE) -> 'Elixir.RDF.Serialization.ParseHelper':to_literal(STRING_LITERAL_QUOTE). to_literal(STRING_LITERAL_QUOTE, Type) -> 'Elixir.RDF.Serialization.ParseHelper':to_literal(STRING_LITERAL_QUOTE, Type). diff --git a/test/unit/turtle_decoder_test.exs b/test/unit/turtle_decoder_test.exs index 64f8192..0233270 100644 --- a/test/unit/turtle_decoder_test.exs +++ b/test/unit/turtle_decoder_test.exs @@ -312,4 +312,58 @@ defmodule RDF.Turtle.DecoderTest do end end + + describe "relative IRIs" do + test "without explicit in-doc base and no document_base option option given" do + assert_raise RuntimeError, fn -> + Turtle.Decoder.decode!( + "<#Aaron> <#Person> .") + end + end + + test "without explicit in-doc base, but document_base option given" do + assert Turtle.Decoder.decode!(""" + <#Aaron> <#Person> . + """, base: "http://example.org/") == Graph.new({EX.Aaron, RDF.type, EX.Person}) + end + + test "with @base given" do + assert Turtle.Decoder.decode!(""" + @base . + <#Aaron> <#Person> . + """) == Graph.new({EX.Aaron, RDF.type, EX.Person}) + + assert Turtle.Decoder.decode!(""" + @base . + <#Aaron> <#Person> . + """) == Graph.new({EX.Aaron, RDF.type, EX.Person}) + end + + test "with BASE given" do + assert Turtle.Decoder.decode!(""" + BASE + <#Aaron> <#Person> . + """) == Graph.new({EX.Aaron, RDF.type, EX.Person}) + + assert Turtle.Decoder.decode!(""" + base + <#Aaron> <#Person> . + """) == Graph.new({EX.Aaron, RDF.type, EX.Person}) + end + + test "when a given base is itself relative" do + assert_raise RDF.InvalidURIError, fn -> + Turtle.Decoder.decode!(""" + @base . + <#Aaron> <#Person> . + """) + end + assert_raise RDF.InvalidURIError, fn -> + Turtle.Decoder.decode!( + "<#Aaron> <#Person> .", + base: "foo") + end + end + end + end