Add Turtle decoder

2017-07-05 23:27:55 +02:00 · 2017-07-05 23:27:55 +02:00 · 06dbddaf3f
parent 63be10310a
commit 06dbddaf3f
9 changed files with 676 additions and 14 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -7,6 +7,10 @@ This project adheres to [Semantic Versioning](http://semver.org/) and

 ## Unreleased

+### Added
+
+- Turtle decoder 
+
 ### Changed

 - Don't support Elixir versions < 1.4 
--- a/README.md
+++ b/README.md
@ -9,11 +9,10 @@ An implementation of the [RDF](https://www.w3.org/TR/rdf11-primer/) data model i

 ## Features

- aims to be fully compatible with the RDF 1.1 specification; any incompatibility is considered a bug
- pure Elixir implementation
+- fully compatible with the RDF 1.1 specification
 - no dependencies
 - in-memory data structures for RDF descriptions, RDF graphs and RDF datasets
- support for RDF vocabularies via Elixir modules for safe, i.e. compile-time checked and concise usage of the URIs of vocabularies, resembling QNames
+- support for RDF vocabularies via Elixir modules for safe, i.e. compile-time checked and concise usage of URIs
 - XML schema datatypes for RDF literals (not yet all supported)
 - sigils for the most common types of nodes, i.e. URIs, literals and blank nodes
 - a description DSL resembling Turtle in Elixir 
@ -118,7 +117,8 @@ iex> uri(RDF.Property)

 This way of expressing URIs has the additional benefit, that the existence of the referenced URI is checked at compile time, i.e. whenever a term is used that is not part of the resp. vocabulary an error is raised by the Elixir compiler (unless the vocabulary namespace is non-strict; see below).

-For terms not adhering to the capitalization rules (lowercase properties, capitalized non-properties) or containing characters not allowed within atoms, these namespace define aliases accordingly. If unsure, you can have a look at the documentation or the vocabulary namespace definition. 
+For terms not adhering to the capitalization rules (lowercase properties, capitalized non-properties) or containing characters not allowed within atoms, the predefined namespaces in `RDF.NS` and `RDF.Vocab` define aliases accordingly. If unsure, have a look at the documentation or their definitions. 
+

 #### Description DSL

@ -632,15 +632,14 @@ Currently only [JSON-LD] is available with the [JSON-LD.ex] package.
 - [Google Group](https://groups.google.com/d/forum/rdfex)


-## Development
+## TODO

 There's still much to do for a complete RDF ecosystem for Elixir, which means there are plenty of opportunities for you to contribute. Here are some suggestions:

 - more serialization formats
-    - [Turtle]
    - [RDFa]
-    - [N3]
    - [RDF-XML]
+    - [N3]
    - et al.
 - missing XSD datatypes
 - more sophisticated query capabilities and full SPARQL support (in the style of Ecto queries)
--- a/lib/rdf/serialization/parse_helper.ex
+++ b/lib/rdf/serialization/parse_helper.ex
@ -1,6 +1,13 @@
 defmodule RDF.Serialization.ParseHelper do
  @moduledoc false

+  alias RDF.Datatype.NS.XSD
+
+  @rdf_type RDF.uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")
+  def rdf_type, do: @rdf_type
+
+  def to_uri_string({:iriref, line, value}), do: value
+
  def to_uri({:iriref, line, value}) do
    case URI.parse(value) do
      %URI{scheme: nil} -> {:error, line, "#{value} is not a valid URI"}
@ -9,16 +16,33 @@ defmodule RDF.Serialization.ParseHelper do
  end

  def to_bnode({:blank_node_label, _line, value}), do: RDF.bnode(value)
+  def to_bnode({:anon, _line}), do: RDF.bnode # TODO:

  def to_literal({:string_literal_quote, _line, value}),
    do: RDF.literal(value)
+  def to_literal({:integer, _line, value}), do: RDF.literal(value)
+  def to_literal({:decimal, _line, value}), do: RDF.literal(value)
+  def to_literal({:double,  _line, value}), do: RDF.literal(value)
+  def to_literal({:boolean,  _line, value}), do: RDF.literal(value)
  def to_literal({:string_literal_quote, _line, value}, type),
    do: RDF.literal(value, [type])

-  def to_langtag({:langtag, _line, value}), do: value
+  def integer(value),   do: RDF.Integer.new(List.to_string(value))
+  def decimal(value),   do: RDF.Literal.new(List.to_string(value), datatype: XSD.decimal)
+  def double(value),    do: RDF.Double.new(List.to_string(value))
+  def boolean('true'),  do: true
+  def boolean('false'), do: false

-  def bnode_str('_:' ++ value),  do: List.to_string(value)
-  def langtag_str('@' ++ value), do: List.to_string(value)
-  def quoted_content_str(value), do: value |> List.to_string |> String.slice(1..-2)
+  def to_langtag({:langtag, _line, value}), do: value
+  def to_langtag({:"@prefix", 1}), do: "prefix"
+  def to_langtag({:"@base", 1}),   do: "base"
+
+  def bnode_str('_:' ++ value),       do: List.to_string(value)
+  def langtag_str('@' ++ value),      do: List.to_string(value)
+  def quoted_content_str(value),      do: value |> List.to_string |> String.slice(1..-2)
+  def long_quoted_content_str(value), do: value |> List.to_string |> String.slice(3..-4)
+
+  def prefix_ns(value), do: value |> List.to_string |> String.slice(0..-2)
+  def prefix_ln(value), do: value |> List.to_string |> String.split(":", parts: 2) |> List.to_tuple

 end
--- a/lib/rdf/serializations/turtle.ex
+++ b/lib/rdf/serializations/turtle.ex
@ -0,0 +1,17 @@
+defmodule RDF.Turtle do
+  @moduledoc """
+  `RDF.Turtle` provides support for reading and writing the Turtle
+  serialization format.
+
+  see <https://www.w3.org/TR/turtle/>
+  """
+
+  use RDF.Serialization
+
+  import RDF.Sigils
+
+  @id           ~I<http://www.w3.org/ns/formats/Turtle>
+  @extension    "ttl"
+  @content_type "text/turtle"
+
+end
--- a/lib/rdf/serializations/turtle_decoder.ex
+++ b/lib/rdf/serializations/turtle_decoder.ex
@ -0,0 +1,129 @@
+defmodule RDF.Turtle.Decoder do
+  @moduledoc false
+
+  use RDF.Serialization.Decoder
+
+  defmodule State do
+    defstruct base_uri: nil, namespaces: %{}, bnode_counter: 0
+
+    def add_namespace(%State{namespaces: namespaces} = state, ns, iri) do
+      %State{state | namespaces: Map.put(namespaces, ns, iri)}
+    end
+
+    def ns(%State{namespaces: namespaces}, prefix) do
+      namespaces[prefix]
+    end
+
+    def next_bnode(%State{bnode_counter: bnode_counter} = state) do
+      {RDF.bnode("b#{bnode_counter}"),
+        %State{state | bnode_counter: bnode_counter + 1}}
+    end
+  end
+
+  def decode(content, _opts \\ []) do
+    with {:ok, tokens, _} <- tokenize(content),
+         {:ok, ast}       <- parse(tokens) do
+      {:ok, build_graph(ast)}
+    else
+      {:error, {error_line, :turtle_lexer, error_descriptor}, _error_line_again} ->
+        {:error, "Turtle scanner error on line #{error_line}: #{inspect error_descriptor}"}
+      {:error, {error_line, :turtle_parser, error_descriptor}} ->
+        {:error, "Turtle parser error on line #{error_line}: #{inspect error_descriptor}"}
+    end
+  end
+
+  defp tokenize(content), do: content |> to_charlist |> :turtle_lexer.string
+
+  defp parse([]),     do: {:ok, []}
+  defp parse(tokens), do: tokens |> :turtle_parser.parse
+
+  defp build_graph(ast) do
+    {graph, _} =
+      Enum.reduce ast, {RDF.Graph.new, %State{}}, fn
+        {:triples, triples_ast}, {graph, state} ->
+          with {statements, state} = triples(triples_ast, state) do
+            {RDF.Graph.add(graph, statements), state}
+          end
+
+        {:directive, directive_ast}, {graph, state} ->
+          {graph, directive(directive_ast, state)}
+
+      end
+    graph
+  end
+
+  defp directive({:prefix, {:prefix_ns, _, ns}, iri}, state) do
+    State.add_namespace(state, ns, iri)
+  end
+
+  defp directive({:base, uri}, state) do
+    %State{state | base_uri: uri}
+  end
+
+
+  defp triples({:blankNodePropertyList, _} = ast, state) do
+    with {_, statements, state} = resolve_node(ast, [], state) do
+      {statements, state}
+    end
+  end
+
+  defp triples({subject, predications}, state) do
+    with {subject, statements, state} = resolve_node(subject, [], state) do
+      Enum.reduce predications, {statements, state}, fn {predicate, objects}, {statements, state} ->
+        with {predicate, statements, state} = resolve_node(predicate, statements, state) do
+          Enum.reduce objects, {statements, state}, fn object, {statements, state} ->
+            with {object, statements, state} = resolve_node(object, statements, state) do
+              {[{subject, predicate, object} | statements], state}
+            end
+          end
+        end
+      end
+    end
+  end
+
+  defp resolve_node({:prefix_ln, _, {prefix, name}}, statements, state) do
+    {RDF.uri(State.ns(state, prefix) <> name), statements, state}
+  end
+
+  defp resolve_node({:anon}, statements, state) do
+    with {node, state} = State.next_bnode(state) do
+      {node, statements, state}
+    end
+  end
+
+  defp resolve_node({:blankNodePropertyList, property_list}, statements, state) do
+    with {subject, state} = State.next_bnode(state),
+         {new_statements, state} = triples({subject, property_list}, state) do
+      {subject, statements ++ new_statements, state}
+    end
+  end
+
+  defp resolve_node({:collection, []}, statements, state) do
+    {RDF.nil, statements, state}
+  end
+
+  defp resolve_node({:collection, elements}, statements, state) do
+    with {first_list_node, state} = State.next_bnode(state),
+         [first_element | rest_elements] = elements,
+         {first_element_node, statements, state} =
+           resolve_node(first_element, statements, state),
+         first_statement = [{first_list_node, RDF.first, first_element_node}] do
+      {last_list_node, statements, state} =
+        Enum.reduce rest_elements, {first_list_node, statements ++ first_statement, state},
+          fn element, {list_node, statements, state} ->
+            with {element_node, statements, state} =
+                   resolve_node(element, statements, state),
+                 {next_list_node, state} = State.next_bnode(state) do
+              {next_list_node, statements ++ [
+                  {list_node,      RDF.rest,  next_list_node},
+                  {next_list_node, RDF.first, element_node},
+                ], state}
+            end
+          end
+      {first_list_node, statements ++ [{last_list_node, RDF.rest, RDF.nil}], state}
+    end
+  end
+
+  defp resolve_node(node, statements, state), do: {node, statements, state}
+
+end
--- a/src/ntriples_lexer.xrl
+++ b/src/ntriples_lexer.xrl
@ -6,9 +6,9 @@ EOL = [\n\r]+
 HEX	=	[0-9]|[A-F]|[a-f]
 UCHAR = (\\u({HEX})({HEX})({HEX})({HEX}))|(\\U({HEX})({HEX})({HEX})({HEX})({HEX})({HEX})({HEX})({HEX}))
 ECHAR = \\[tbnrf"'\\]
-PN_CHARS_BASE = [A-Z]|[a-z]|[\x{00C0}-\x{00D6}]|[\x{00D8}-\x{00F6}]|[\x{00F8}-\x{02FF}]|[\x{0370}-\x{037D}]|[\x{037F}-\x{1FFF}]|[\x{200C}-\x{200D}]|[\x{2070}-\x{218F}]|[\x{2C00}-\x{2FEF}]|[\x{3001}-\x{D7FF}]|[\x{F900}-\x{FDCF}]|[\x{FDF0}-\x{FFFD}]|[\x{10000}-\x{EFFFF}]
-PN_CHARS_U = {PN_CHARS_BASE}|_|:
-PN_CHARS = {PN_CHARS_U}|-|[0-9]|\x{00B7}|[\x{0300}-\x{036F}]|[\x{203F}-\x{2040}]
+PN_CHARS_BASE = ([A-Z]|[a-z]|[\x{00C0}-\x{00D6}]|[\x{00D8}-\x{00F6}]|[\x{00F8}-\x{02FF}]|[\x{0370}-\x{037D}]|[\x{037F}-\x{1FFF}]|[\x{200C}-\x{200D}]|[\x{2070}-\x{218F}]|[\x{2C00}-\x{2FEF}]|[\x{3001}-\x{D7FF}]|[\x{F900}-\x{FDCF}]|[\x{FDF0}-\x{FFFD}]|[\x{10000}-\x{EFFFF}])
+PN_CHARS_U = ({PN_CHARS_BASE}|_|:)
+PN_CHARS = ({PN_CHARS_U}|-|[0-9]|\x{00B7}|[\x{0300}-\x{036F}]|[\x{203F}-\x{2040}])
 IRIREF = <([^\x00-\x20<>"{}|^`\\]|{UCHAR})*>
 STRING_LITERAL_QUOTE = "([^\x22\x5C\x0A\x0D]|{ECHAR}|{UCHAR})*"
 BLANK_NODE_LABEL = _:({PN_CHARS_U}|[0-9])(({PN_CHARS}|\.)*({PN_CHARS}))?
--- a/src/turtle_lexer.xrl
+++ b/src/turtle_lexer.xrl
@ -0,0 +1,90 @@
+%% \00=NULL
+%% \01-\x1F=control codes
+%% \x20=space
+
+Definitions.
+
+COMMENT = #[^\n\r]*
+
+WS	  =	[\s\t\n\r]
+ANON	=	\[{WS}*\]
+
+HEX	          = [0-9]|[A-F]|[a-f]
+UCHAR         = (\\u({HEX})({HEX})({HEX})({HEX}))|(\\U({HEX})({HEX})({HEX})({HEX})({HEX})({HEX})({HEX})({HEX}))
+ECHAR         = \\[tbnrf"'\\]
+PERCENT	      =	%{HEX}{HEX}
+PN_CHARS_BASE = ([A-Z]|[a-z]|[\x{00C0}-\x{00D6}]|[\x{00D8}-\x{00F6}]|[\x{00F8}-\x{02FF}]|[\x{0370}-\x{037D}]|[\x{037F}-\x{1FFF}]|[\x{200C}-\x{200D}]|[\x{2070}-\x{218F}]|[\x{2C00}-\x{2FEF}]|[\x{3001}-\x{D7FF}]|[\x{F900}-\x{FDCF}]|[\x{FDF0}-\x{FFFD}]|[\x{10000}-\x{EFFFF}])
+PN_CHARS_U    = ({PN_CHARS_BASE}|_)
+PN_CHARS      = ({PN_CHARS_U}|-|[0-9]|\x{00B7}|[\x{0300}-\x{036F}]|[\x{203F}-\x{2040}])
+PN_PREFIX	    =	({PN_CHARS_BASE}(({PN_CHARS}|\.)*{PN_CHARS})?)
+PN_LOCAL	    =	({PN_CHARS_U}|:|[0-9]|{PLX})(({PN_CHARS}|\.|:|{PLX})*({PN_CHARS}|:|{PLX}))?
+PN_LOCAL_ESC	=	\\(_|\~|\.|\-|\!|\$|\&|\'|\(|\)|\*|\+|\,|\;|\=|\/|\?|\#|\@|\%)
+PLX	          =	{PERCENT}|{PN_LOCAL_ESC}
+PNAME_NS	    =	{PN_PREFIX}?:
+PNAME_LN	    =	{PNAME_NS}{PN_LOCAL}
+
+EXPONENT	=	([eE][+-]?[0-9]+)
+BOOLEAN   = true|false
+INTEGER	  =	[+-]?[0-9]+
+DECIMAL	  =	[+-]?[0-9]*\.[0-9]+
+DOUBLE	  =	[+-]?([0-9]+\.[0-9]*{EXPONENT}|\.[0-9]+{EXPONENT}|[0-9]+{EXPONENT})
+
+IRIREF = <([^\x00-\x20<>"{}|^`\\]|{UCHAR})*>
+STRING_LITERAL_QUOTE              = "([^"\\\n\r]|{ECHAR}|{UCHAR})*"
+STRING_LITERAL_SINGLE_QUOTE	      =	'([^'\\\n\r]|{ECHAR}|{UCHAR})*'
+STRING_LITERAL_LONG_SINGLE_QUOTE	=	'''(('|'')?([^'\\]|{ECHAR}|{UCHAR}))*'''
+STRING_LITERAL_LONG_QUOTE	        =	"""(("|"")?([^"\\]|{ECHAR}|{UCHAR}))*"""
+BLANK_NODE_LABEL = _:({PN_CHARS_U}|[0-9])(({PN_CHARS}|\.)*({PN_CHARS}))?
+LANGTAG	=	@[a-zA-Z]+(-[a-zA-Z0-9]+)*
+
+BASE    = [Bb][Aa][Ss][Ee]
+PREFIX  = [Pp][Rr][Ee][Ff][Ii][Xx]
+
+
+Rules.
+
+@prefix                            : {token, {'@prefix', TokenLine}}.
+@base                              : {token, {'@base', TokenLine}}.
+{BASE}                             : {token, {'BASE', TokenLine}}.
+{PREFIX}                           : {token, {'PREFIX', TokenLine}}.
+{LANGTAG}                          : {token, {langtag, TokenLine, langtag_str(TokenChars)}}.
+{IRIREF}                           : {token, {iriref,  TokenLine, quoted_content_str(TokenChars)}}.
+{DOUBLE}                           : {token, {double, TokenLine, double(TokenChars)}}.
+{DECIMAL}                          : {token, {decimal, TokenLine, decimal(TokenChars)}}.
+{INTEGER}	                         : {token, {integer,  TokenLine, integer(TokenChars)}}.
+{BOOLEAN}                          : {token, {boolean, TokenLine, boolean(TokenChars)}}.
+{STRING_LITERAL_SINGLE_QUOTE}      : {token, {string_literal_quote, TokenLine, quoted_content_str(TokenChars)}}.
+{STRING_LITERAL_QUOTE}             : {token, {string_literal_quote, TokenLine, quoted_content_str(TokenChars)}}.
+{STRING_LITERAL_LONG_SINGLE_QUOTE} : {token, {string_literal_quote, TokenLine, long_quoted_content_str(TokenChars)}}.
+{STRING_LITERAL_LONG_QUOTE}        : {token, {string_literal_quote, TokenLine, long_quoted_content_str(TokenChars)}}.
+{BLANK_NODE_LABEL}                 : {token, {blank_node_label, TokenLine, bnode_str(TokenChars)}}.
+{ANON}	                           : {token, {anon, TokenLine}}.
+a                                  : {token, {'a', TokenLine}}.
+{PNAME_NS}                         : {token, {prefix_ns, TokenLine, prefix_ns(TokenChars)}}.
+{PNAME_LN}                         : {token, {prefix_ln, TokenLine, prefix_ln(TokenChars)}}.
+; 	                               : {token, {';', TokenLine}}.
+, 	                               : {token, {',', TokenLine}}.
+\.	                               : {token, {'.', TokenLine}}.
+\[	                               : {token, {'[', TokenLine}}.
+\]	                               : {token, {']', TokenLine}}.
+\(	                               : {token, {'(', TokenLine}}.
+\)	                               : {token, {')', TokenLine}}.
+\^\^	                             : {token, {'^^', TokenLine}}.
+
+{WS}+                              : skip_token.
+{COMMENT}                          : skip_token.
+
+
+Erlang code.
+
+integer(TokenChars)  -> 'Elixir.RDF.Serialization.ParseHelper':integer(TokenChars).
+decimal(TokenChars)  -> 'Elixir.RDF.Serialization.ParseHelper':decimal(TokenChars).
+double(TokenChars)   -> 'Elixir.RDF.Serialization.ParseHelper':double(TokenChars).
+boolean(TokenChars)  -> 'Elixir.RDF.Serialization.ParseHelper':boolean(TokenChars).
+quoted_content_str(TokenChars) -> 'Elixir.RDF.Serialization.ParseHelper':quoted_content_str(TokenChars).
+long_quoted_content_str(TokenChars) -> 'Elixir.RDF.Serialization.ParseHelper':long_quoted_content_str(TokenChars).
+bnode_str(TokenChars) -> 'Elixir.RDF.Serialization.ParseHelper':bnode_str(TokenChars).
+langtag_str(TokenChars) -> 'Elixir.RDF.Serialization.ParseHelper':langtag_str(TokenChars).
+prefix_ns(TokenChars) -> 'Elixir.RDF.Serialization.ParseHelper':prefix_ns(TokenChars).
+prefix_ln(TokenChars) -> 'Elixir.RDF.Serialization.ParseHelper':prefix_ln(TokenChars).
+
--- a/src/turtle_parser.yrl
+++ b/src/turtle_parser.yrl
@ -0,0 +1,96 @@
+%% Grammar for Turtle as specified in https://www.w3.org/TR/2014/REC-n-triples-20140225/
+
+Nonterminals turtleDoc statement directive prefixID base sparqlPrefix sparqlBase
+  triples predicateObjectList objectList blankNodePropertyList
+  verb subject predicate object collection collection_elements
+  literal numericLiteral rdfLiteral booleanLiteral iri prefixedName blankNode.
+
+Terminals prefix_ns prefix_ln iriref blank_node_label anon
+  string_literal_quote langtag integer decimal double boolean
+  '.' ';' ',' '[' ']' '(' ')' '^^' '@prefix' '@base' 'PREFIX' 'BASE' 'a' .
+
+Rootsymbol turtleDoc.
+
+
+turtleDoc -> statement : ['$1'] .
+turtleDoc -> statement turtleDoc : ['$1' | '$2'] .
+
+statement -> directive    : {directive, '$1'} .
+statement -> triples '.'  : {triples, '$1'} .
+
+directive -> prefixID     : '$1' .
+directive -> sparqlPrefix : '$1' .
+directive -> base         : '$1' .
+directive -> sparqlBase   : '$1' .
+
+prefixID      -> '@prefix' prefix_ns iriref '.' : {prefix, '$2', to_uri_string('$3')} .
+sparqlPrefix  -> 'PREFIX' prefix_ns iriref      : {prefix, '$2', to_uri_string('$3')} .
+sparqlBase    -> 'BASE' iriref                  : {base, to_uri_string('$2')} .
+base          -> '@base' iriref '.'             : {base, to_uri_string('$2')} .
+
+triples -> subject predicateObjectList                : { '$1', '$2' }.
+triples -> blankNodePropertyList predicateObjectList  : { '$1', '$2' }.
+triples -> blankNodePropertyList                      : '$1'.
+
+predicateObjectList -> verb objectList     : [{'$1', '$2'}] .
+predicateObjectList -> verb objectList ';' : [{'$1', '$2'}] .
+predicateObjectList -> verb objectList ';' predicateObjectList : [{'$1', '$2'} | '$4'] .
+
+objectList -> object                : ['$1'] .
+objectList -> object ',' objectList : ['$1' | '$3'] .
+
+blankNodePropertyList -> '[' predicateObjectList ']' : {blankNodePropertyList, '$2'} .
+
+verb      -> 'a'                    : rdf_type() .
+verb      -> predicate              : '$1' .
+subject   -> iri                    : '$1' .
+subject   -> blankNode              : '$1' .
+subject   -> collection             : '$1' .
+predicate -> iri                    : '$1' .
+object    -> iri                    : '$1' .
+object    -> blankNode              : '$1' .
+object    -> collection             : '$1' .
+object    -> blankNodePropertyList  : '$1' .
+object    -> literal                : '$1' .
+
+collection -> '(' ')'                     : {collection, []} .
+collection -> '(' collection_elements ')' : {collection, '$2'} .
+collection_elements -> object                     : ['$1'] .
+collection_elements -> object collection_elements : ['$1' | '$2'] .
+
+prefixedName -> prefix_ln : '$1' .
+prefixedName -> prefix_ns : '$1' .
+
+literal -> rdfLiteral     : '$1' .
+literal -> numericLiteral : '$1' .
+literal -> booleanLiteral : '$1' .
+rdfLiteral -> string_literal_quote '^^' iriref : to_literal('$1', {datatype, to_uri('$3')}) .
+rdfLiteral -> string_literal_quote langtag     : to_literal('$1', {language, to_langtag('$2')}) .
+rdfLiteral -> string_literal_quote '@prefix'   : to_literal('$1', {language, to_langtag('$2')}) .
+rdfLiteral -> string_literal_quote '@base'   : to_literal('$1', {language, to_langtag('$2')}) .
+rdfLiteral -> string_literal_quote             : to_literal('$1') .
+numericLiteral -> integer : to_literal('$1') .
+numericLiteral -> decimal : to_literal('$1') .
+numericLiteral -> double  : to_literal('$1') .
+booleanLiteral -> boolean : to_literal('$1') .
+
+iri -> iriref       : to_uri('$1') .
+iri -> prefixedName : '$1' .
+
+blankNode -> blank_node_label : to_bnode('$1') .
+blankNode -> anon             : {anon} .
+
+
+Erlang code.
+
+to_uri_string(IRIREF) -> 'Elixir.RDF.Serialization.ParseHelper':to_uri_string(IRIREF) .
+to_uri(IRIREF) ->
+  case 'Elixir.RDF.Serialization.ParseHelper':to_uri(IRIREF) of
+    {ok, URI} -> URI;
+    {error, ErrorLine, Message} -> return_error(ErrorLine, Message)
+  end.
+to_bnode(BLANK_NODE) -> 'Elixir.RDF.Serialization.ParseHelper':to_bnode(BLANK_NODE).
+to_literal(STRING_LITERAL_QUOTE) -> 'Elixir.RDF.Serialization.ParseHelper':to_literal(STRING_LITERAL_QUOTE).
+to_literal(STRING_LITERAL_QUOTE, Type) -> 'Elixir.RDF.Serialization.ParseHelper':to_literal(STRING_LITERAL_QUOTE, Type).
+to_langtag(LANGTAG) -> 'Elixir.RDF.Serialization.ParseHelper':to_langtag(LANGTAG).
+rdf_type() -> 'Elixir.RDF.Serialization.ParseHelper':rdf_type().
--- a/test/unit/turtle_decoder_test.exs
+++ b/test/unit/turtle_decoder_test.exs
@ -0,0 +1,303 @@
+defmodule RDF.Turtle.DecoderTest do
+  use ExUnit.Case, async: false
+
+  doctest RDF.Turtle.Decoder
+
+  import RDF.Sigils
+
+  alias RDF.{Turtle, Graph, TestData}
+  alias RDF.NS.{XSD}
+
+
+  use RDF.Vocabulary.Namespace
+
+  defvocab EX,
+    base_uri: "http://example.org/#",
+    terms: [], strict: false
+
+  defvocab P,
+    base_uri: "http://www.perceive.net/schemas/relationship/",
+    terms: [], strict: false
+
+
+  test "an empty string is deserialized to an empty graph" do
+    assert Turtle.Decoder.decode!("") == Graph.new
+    assert Turtle.Decoder.decode!("  \n\r\r\n  ") == Graph.new
+  end
+
+  test "a single triple with URIs" do
+    assert Turtle.Decoder.decode!("""
+      <http://example.org/#Person> <http://xmlns.com/foaf/0.1/name> "Aaron Swartz" .
+    """) == Graph.new({EX.Person, ~I<http://xmlns.com/foaf/0.1/name>, "Aaron Swartz"})
+  end
+
+  test "decoding a single triple with a blank node" do
+    assert Turtle.Decoder.decode!("""
+      _:foo <http://example.org/#p> <http://example.org/#O> .
+      """) == Graph.new({RDF.bnode("foo"), EX.p, EX.O})
+    assert Turtle.Decoder.decode!("""
+      <http://example.org/#S> <http://example.org/#p> _:1 .
+      """) == Graph.new({EX.S, EX.p, RDF.bnode("1")})
+    assert Turtle.Decoder.decode!("""
+      _:foo <http://example.org/#p> _:bar .
+      """) == Graph.new({RDF.bnode("foo"), EX.p, RDF.bnode("bar")})
+  end
+
+  test "decoding a single triple with an untyped string literal" do
+    assert Turtle.Decoder.decode!("""
+      <http://example.org/#spiderman> <http://www.perceive.net/schemas/relationship/realname> "Peter Parker" .
+      """) == Graph.new({EX.spiderman, P.realname, RDF.literal("Peter Parker")})
+  end
+
+  test "decoding a single triple with an untyped long quoted string literal" do
+    assert Turtle.Decoder.decode!("""
+      <http://example.org/#spiderman> <http://www.perceive.net/schemas/relationship/realname> '''Peter Parker''' .
+      """) == Graph.new({EX.spiderman, P.realname, RDF.literal("Peter Parker")})
+  end
+
+  test "decoding a single triple with a typed literal" do
+    assert Turtle.Decoder.decode!("""
+      <http://example.org/#spiderman> <http://example.org/#p> "42"^^<http://www.w3.org/2001/XMLSchema#integer> .
+      """) == Graph.new({EX.spiderman, EX.p, RDF.literal(42)})
+  end
+
+  test "decoding a single triple with a language tagged literal" do
+    assert Turtle.Decoder.decode!("""
+      <http://example.org/#S> <http://example.org/#p> "foo"@en .
+      """) == Graph.new({EX.S, EX.p, RDF.literal("foo", language: "en")})
+  end
+
+  test "decoding a single triple with a '@prefix' or '@base' language tagged literal" do
+    assert Turtle.Decoder.decode!("""
+      <http://example.org/#S> <http://example.org/#p> "foo"@prefix .
+      """) == Graph.new({EX.S, EX.p, RDF.literal("foo", language: "prefix")})
+
+    assert Turtle.Decoder.decode!("""
+      <http://example.org/#S> <http://example.org/#p> "foo"@base .
+      """) == Graph.new({EX.S, EX.p, RDF.literal("foo", language: "base")})
+  end
+
+
+  test "decoding multiple triples" do
+    assert Turtle.Decoder.decode!("""
+      <http://example.org/#S1> <http://example.org/#p1> <http://example.org/#O1> .
+      <http://example.org/#S1> <http://example.org/#p2> <http://example.org/#O2> .
+      """) == Graph.new([
+        {EX.S1, EX.p1, EX.O1},
+        {EX.S1, EX.p2, EX.O2},
+      ])
+    assert Turtle.Decoder.decode!("""
+      <http://example.org/#S1> <http://example.org/#p1> <http://example.org/#O1> .
+      <http://example.org/#S1> <http://example.org/#p2> <http://example.org/#O2> .
+      <http://example.org/#S2> <http://example.org/#p3> <http://example.org/#O3> .
+      """) == Graph.new([
+        {EX.S1, EX.p1, EX.O1},
+        {EX.S1, EX.p2, EX.O2},
+        {EX.S2, EX.p3, EX.O3}
+      ])
+  end
+
+
+  test "a statement with the 'a' keyword" do
+    assert Turtle.Decoder.decode!("""
+      <http://example.org/#Aaron> a <http://example.org/#Person> .
+    """) == Graph.new({EX.Aaron, RDF.type, EX.Person})
+  end
+
+  test "a statement with a blank node via []" do
+    assert Turtle.Decoder.decode!("""
+      [] <http://xmlns.com/foaf/0.1/name> "Aaron Swartz" .
+    """) == Graph.new({RDF.bnode("b0"), ~I<http://xmlns.com/foaf/0.1/name>, "Aaron Swartz"})
+
+    assert Turtle.Decoder.decode!("""
+      <http://example.org/#Foo> <http://example.org/#bar> [] .
+    """) == Graph.new({EX.Foo, EX.bar, RDF.bnode("b0")})
+
+    assert Turtle.Decoder.decode!("""
+      <http://example.org/#Foo> <http://example.org/#bar> [    ] .
+    """) == Graph.new({EX.Foo, EX.bar, RDF.bnode("b0")})
+  end
+
+  test "a statement with a boolean" do
+    assert Turtle.Decoder.decode!("""
+      <http://example.org/#Foo> <http://example.org/#bar> true .
+    """) == Graph.new({EX.Foo, EX.bar, RDF.Boolean.new(true)})
+    assert Turtle.Decoder.decode!("""
+      <http://example.org/#Foo> <http://example.org/#bar> false .
+    """) == Graph.new({EX.Foo, EX.bar, RDF.Boolean.new(false)})
+  end
+
+  test "a statement with an integer" do
+    assert Turtle.Decoder.decode!("""
+      <http://example.org/#Foo> <http://example.org/#bar> 42 .
+    """) == Graph.new({EX.Foo, EX.bar, RDF.Integer.new(42)})
+  end
+
+  test "a statement with a decimal" do
+    assert Turtle.Decoder.decode!("""
+      <http://example.org/#Foo> <http://example.org/#bar> 3.14 .
+    """) == Graph.new({EX.Foo, EX.bar, RDF.Literal.new("3.14", datatype: XSD.decimal)})
+  end
+
+  test "a statement with a double" do
+    assert Turtle.Decoder.decode!("""
+      <http://example.org/#Foo> <http://example.org/#bar> 1.2e3 .
+    """) == Graph.new({EX.Foo, EX.bar, RDF.Double.new("1.2e3")})
+  end
+
+  test "a statement with multiple objects" do
+    assert Turtle.Decoder.decode!("""
+      <http://example.org/#Foo> <http://example.org/#bar> "baz", 1, true .
+    """) == Graph.new([
+              {EX.Foo, EX.bar, "baz"},
+              {EX.Foo, EX.bar, 1},
+              {EX.Foo, EX.bar, true},
+            ])
+  end
+
+  test "a statement with multiple predications" do
+    assert Turtle.Decoder.decode!("""
+      <http://example.org/#Foo> <http://example.org/#bar> "baz";
+                                <http://example.org/#baz> 42 .
+    """) == Graph.new([
+              {EX.Foo, EX.bar, "baz"},
+              {EX.Foo, EX.baz, 42},
+            ])
+  end
+
+  test "a statement with a blank node property list on object position" do
+    assert Turtle.Decoder.decode!("""
+      <http://example.org/#Foo> <http://example.org/#bar> [ <http://example.org/#baz> 42 ] .
+    """) == Graph.new([
+              {EX.Foo, EX.bar, RDF.bnode("b0")},
+              {RDF.bnode("b0"), EX.baz, 42},
+            ])
+  end
+
+  test "a statement with a blank node property list on subject position" do
+    assert Turtle.Decoder.decode!("""
+      [ <http://example.org/#baz> 42 ] <http://example.org/#bar> false .
+    """) == Graph.new([
+              {RDF.bnode("b0"), EX.baz, 42},
+              {RDF.bnode("b0"), EX.bar, false},
+            ])
+  end
+
+  test "a single blank node property list" do
+    assert Turtle.Decoder.decode!("[ <http://example.org/#foo> 42 ] .") ==
+            Graph.new([{RDF.bnode("b0"), EX.foo, 42}])
+  end
+
+  test "a statement with prefixed names" do
+    assert Turtle.Decoder.decode!("""
+      @prefix ex: <http://example.org/#> .
+      ex:Aaron <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> ex:Person .
+    """) == Graph.new({EX.Aaron, RDF.type, EX.Person})
+
+    assert Turtle.Decoder.decode!("""
+      @prefix  ex:  <http://example.org/#> .
+      ex:Aaron <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> ex:Person .
+    """) == Graph.new({EX.Aaron, RDF.type, EX.Person})
+
+    assert Turtle.Decoder.decode!("""
+      PREFIX ex: <http://example.org/#>
+      ex:Aaron <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> ex:Person .
+    """) == Graph.new({EX.Aaron, RDF.type, EX.Person})
+
+    assert Turtle.Decoder.decode!("""
+      prefix ex: <http://example.org/#>
+      ex:Aaron <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> ex:Person .
+    """) == Graph.new({EX.Aaron, RDF.type, EX.Person})
+
+  end
+
+  test "a statement with an empty prefixed name" do
+    assert Turtle.Decoder.decode!("""
+      @prefix : <http://example.org/#> .
+      :Aaron <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> :Person .
+    """) == Graph.new({EX.Aaron, RDF.type, EX.Person})
+
+    assert Turtle.Decoder.decode!("""
+      PREFIX : <http://example.org/#>
+      :Aaron <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> :Person .
+    """) == Graph.new({EX.Aaron, RDF.type, EX.Person})
+  end
+
+  test "a statement with a collection" do
+    assert Turtle.Decoder.decode!("""
+      @prefix : <http://example.org/#> .
+      :subject :predicate ( :a :b :c ) .
+    """) == Graph.new([
+      {EX.subject, EX.predicate, RDF.bnode("b0")},
+      {RDF.bnode("b0"), RDF.first, EX.a},
+      {RDF.bnode("b0"), RDF.rest, RDF.bnode("b1")},
+      {RDF.bnode("b1"), RDF.first, EX.b},
+      {RDF.bnode("b1"), RDF.rest, RDF.bnode("b2")},
+      {RDF.bnode("b2"), RDF.first, EX.c},
+      {RDF.bnode("b2"), RDF.rest, RDF.nil},
+    ])
+  end
+
+  test "a statement with an empty collection" do
+    assert Turtle.Decoder.decode!("""
+      @prefix : <http://example.org/#> .
+      :subject :predicate () .
+    """) == Graph.new({EX.subject, EX.predicate, RDF.nil})
+  end
+
+
+  test "decoding comments" do
+    assert Turtle.Decoder.decode!("# just a comment") == Graph.new
+
+    assert Turtle.Decoder.decode!("""
+      <http://example.org/#S> <http://example.org/#p> _:1 . # a comment
+      """) == Graph.new({EX.S, EX.p, RDF.bnode("1")})
+
+    assert Turtle.Decoder.decode!("""
+      # a comment
+      <http://example.org/#S> <http://example.org/#p> <http://example.org/#O> .
+      """) == Graph.new({EX.S, EX.p, EX.O})
+
+    assert Turtle.Decoder.decode!("""
+      <http://example.org/#S> <http://example.org/#p> <http://example.org/#O> .
+      # a comment
+      """) == Graph.new({EX.S, EX.p, EX.O})
+
+    assert Turtle.Decoder.decode!("""
+      # Header line 1
+      # Header line 2
+      <http://example.org/#S1> <http://example.org/#p1> <http://example.org/#O1> .
+      # 1st comment
+      <http://example.org/#S1> <http://example.org/#p2> <http://example.org/#O2> . # 2nd comment
+      # last comment
+      """) == Graph.new([
+        {EX.S1, EX.p1, EX.O1},
+        {EX.S1, EX.p2, EX.O2},
+      ])
+  end
+
+  test "empty lines" do
+    assert Turtle.Decoder.decode!("""
+
+      <http://example.org/#spiderman> <http://www.perceive.net/schemas/relationship/enemyOf> <http://example.org/#green_goblin> .
+      """) == Graph.new({EX.spiderman, P.enemyOf, EX.green_goblin})
+
+    assert Turtle.Decoder.decode!("""
+      <http://example.org/#spiderman> <http://www.perceive.net/schemas/relationship/enemyOf> <http://example.org/#green_goblin> .
+
+      """) == Graph.new({EX.spiderman, P.enemyOf, EX.green_goblin})
+
+    assert Turtle.Decoder.decode!("""
+
+      <http://example.org/#S1> <http://example.org/#p1> <http://example.org/#O1> .
+
+
+      <http://example.org/#S1> <http://example.org/#p2> <http://example.org/#O2> .
+
+      """) == Graph.new([
+        {EX.S1, EX.p1, EX.O1},
+        {EX.S1, EX.p2, EX.O2},
+      ])
+  end
+
+end