Add Turtle decoder

This commit is contained in:
Marcel Otto 2017-07-05 23:27:55 +02:00
parent 63be10310a
commit 06dbddaf3f
9 changed files with 676 additions and 14 deletions

View file

@ -7,6 +7,10 @@ This project adheres to [Semantic Versioning](http://semver.org/) and
## Unreleased
### Added
- Turtle decoder
### Changed
- Don't support Elixir versions < 1.4

View file

@ -9,11 +9,10 @@ An implementation of the [RDF](https://www.w3.org/TR/rdf11-primer/) data model i
## Features
- aims to be fully compatible with the RDF 1.1 specification; any incompatibility is considered a bug
- pure Elixir implementation
- fully compatible with the RDF 1.1 specification
- no dependencies
- in-memory data structures for RDF descriptions, RDF graphs and RDF datasets
- support for RDF vocabularies via Elixir modules for safe, i.e. compile-time checked and concise usage of the URIs of vocabularies, resembling QNames
- support for RDF vocabularies via Elixir modules for safe, i.e. compile-time checked and concise usage of URIs
- XML schema datatypes for RDF literals (not yet all supported)
- sigils for the most common types of nodes, i.e. URIs, literals and blank nodes
- a description DSL resembling Turtle in Elixir
@ -118,7 +117,8 @@ iex> uri(RDF.Property)
This way of expressing URIs has the additional benefit, that the existence of the referenced URI is checked at compile time, i.e. whenever a term is used that is not part of the resp. vocabulary an error is raised by the Elixir compiler (unless the vocabulary namespace is non-strict; see below).
For terms not adhering to the capitalization rules (lowercase properties, capitalized non-properties) or containing characters not allowed within atoms, these namespace define aliases accordingly. If unsure, you can have a look at the documentation or the vocabulary namespace definition.
For terms not adhering to the capitalization rules (lowercase properties, capitalized non-properties) or containing characters not allowed within atoms, the predefined namespaces in `RDF.NS` and `RDF.Vocab` define aliases accordingly. If unsure, have a look at the documentation or their definitions.
#### Description DSL
@ -632,15 +632,14 @@ Currently only [JSON-LD] is available with the [JSON-LD.ex] package.
- [Google Group](https://groups.google.com/d/forum/rdfex)
## Development
## TODO
There's still much to do for a complete RDF ecosystem for Elixir, which means there are plenty of opportunities for you to contribute. Here are some suggestions:
- more serialization formats
- [Turtle]
- [RDFa]
- [N3]
- [RDF-XML]
- [N3]
- et al.
- missing XSD datatypes
- more sophisticated query capabilities and full SPARQL support (in the style of Ecto queries)

View file

@ -1,6 +1,13 @@
defmodule RDF.Serialization.ParseHelper do
@moduledoc false
alias RDF.Datatype.NS.XSD
@rdf_type RDF.uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")
def rdf_type, do: @rdf_type
def to_uri_string({:iriref, line, value}), do: value
def to_uri({:iriref, line, value}) do
case URI.parse(value) do
%URI{scheme: nil} -> {:error, line, "#{value} is not a valid URI"}
@ -9,16 +16,33 @@ defmodule RDF.Serialization.ParseHelper do
end
def to_bnode({:blank_node_label, _line, value}), do: RDF.bnode(value)
def to_bnode({:anon, _line}), do: RDF.bnode # TODO:
def to_literal({:string_literal_quote, _line, value}),
do: RDF.literal(value)
def to_literal({:integer, _line, value}), do: RDF.literal(value)
def to_literal({:decimal, _line, value}), do: RDF.literal(value)
def to_literal({:double, _line, value}), do: RDF.literal(value)
def to_literal({:boolean, _line, value}), do: RDF.literal(value)
def to_literal({:string_literal_quote, _line, value}, type),
do: RDF.literal(value, [type])
def to_langtag({:langtag, _line, value}), do: value
def integer(value), do: RDF.Integer.new(List.to_string(value))
def decimal(value), do: RDF.Literal.new(List.to_string(value), datatype: XSD.decimal)
def double(value), do: RDF.Double.new(List.to_string(value))
def boolean('true'), do: true
def boolean('false'), do: false
def bnode_str('_:' ++ value), do: List.to_string(value)
def langtag_str('@' ++ value), do: List.to_string(value)
def quoted_content_str(value), do: value |> List.to_string |> String.slice(1..-2)
def to_langtag({:langtag, _line, value}), do: value
def to_langtag({:"@prefix", 1}), do: "prefix"
def to_langtag({:"@base", 1}), do: "base"
def bnode_str('_:' ++ value), do: List.to_string(value)
def langtag_str('@' ++ value), do: List.to_string(value)
def quoted_content_str(value), do: value |> List.to_string |> String.slice(1..-2)
def long_quoted_content_str(value), do: value |> List.to_string |> String.slice(3..-4)
def prefix_ns(value), do: value |> List.to_string |> String.slice(0..-2)
def prefix_ln(value), do: value |> List.to_string |> String.split(":", parts: 2) |> List.to_tuple
end

View file

@ -0,0 +1,17 @@
defmodule RDF.Turtle do
@moduledoc """
`RDF.Turtle` provides support for reading and writing the Turtle
serialization format.
see <https://www.w3.org/TR/turtle/>
"""
use RDF.Serialization
import RDF.Sigils
@id ~I<http://www.w3.org/ns/formats/Turtle>
@extension "ttl"
@content_type "text/turtle"
end

View file

@ -0,0 +1,129 @@
defmodule RDF.Turtle.Decoder do
@moduledoc false
use RDF.Serialization.Decoder
defmodule State do
defstruct base_uri: nil, namespaces: %{}, bnode_counter: 0
def add_namespace(%State{namespaces: namespaces} = state, ns, iri) do
%State{state | namespaces: Map.put(namespaces, ns, iri)}
end
def ns(%State{namespaces: namespaces}, prefix) do
namespaces[prefix]
end
def next_bnode(%State{bnode_counter: bnode_counter} = state) do
{RDF.bnode("b#{bnode_counter}"),
%State{state | bnode_counter: bnode_counter + 1}}
end
end
def decode(content, _opts \\ []) do
with {:ok, tokens, _} <- tokenize(content),
{:ok, ast} <- parse(tokens) do
{:ok, build_graph(ast)}
else
{:error, {error_line, :turtle_lexer, error_descriptor}, _error_line_again} ->
{:error, "Turtle scanner error on line #{error_line}: #{inspect error_descriptor}"}
{:error, {error_line, :turtle_parser, error_descriptor}} ->
{:error, "Turtle parser error on line #{error_line}: #{inspect error_descriptor}"}
end
end
defp tokenize(content), do: content |> to_charlist |> :turtle_lexer.string
defp parse([]), do: {:ok, []}
defp parse(tokens), do: tokens |> :turtle_parser.parse
defp build_graph(ast) do
{graph, _} =
Enum.reduce ast, {RDF.Graph.new, %State{}}, fn
{:triples, triples_ast}, {graph, state} ->
with {statements, state} = triples(triples_ast, state) do
{RDF.Graph.add(graph, statements), state}
end
{:directive, directive_ast}, {graph, state} ->
{graph, directive(directive_ast, state)}
end
graph
end
defp directive({:prefix, {:prefix_ns, _, ns}, iri}, state) do
State.add_namespace(state, ns, iri)
end
defp directive({:base, uri}, state) do
%State{state | base_uri: uri}
end
defp triples({:blankNodePropertyList, _} = ast, state) do
with {_, statements, state} = resolve_node(ast, [], state) do
{statements, state}
end
end
defp triples({subject, predications}, state) do
with {subject, statements, state} = resolve_node(subject, [], state) do
Enum.reduce predications, {statements, state}, fn {predicate, objects}, {statements, state} ->
with {predicate, statements, state} = resolve_node(predicate, statements, state) do
Enum.reduce objects, {statements, state}, fn object, {statements, state} ->
with {object, statements, state} = resolve_node(object, statements, state) do
{[{subject, predicate, object} | statements], state}
end
end
end
end
end
end
defp resolve_node({:prefix_ln, _, {prefix, name}}, statements, state) do
{RDF.uri(State.ns(state, prefix) <> name), statements, state}
end
defp resolve_node({:anon}, statements, state) do
with {node, state} = State.next_bnode(state) do
{node, statements, state}
end
end
defp resolve_node({:blankNodePropertyList, property_list}, statements, state) do
with {subject, state} = State.next_bnode(state),
{new_statements, state} = triples({subject, property_list}, state) do
{subject, statements ++ new_statements, state}
end
end
defp resolve_node({:collection, []}, statements, state) do
{RDF.nil, statements, state}
end
defp resolve_node({:collection, elements}, statements, state) do
with {first_list_node, state} = State.next_bnode(state),
[first_element | rest_elements] = elements,
{first_element_node, statements, state} =
resolve_node(first_element, statements, state),
first_statement = [{first_list_node, RDF.first, first_element_node}] do
{last_list_node, statements, state} =
Enum.reduce rest_elements, {first_list_node, statements ++ first_statement, state},
fn element, {list_node, statements, state} ->
with {element_node, statements, state} =
resolve_node(element, statements, state),
{next_list_node, state} = State.next_bnode(state) do
{next_list_node, statements ++ [
{list_node, RDF.rest, next_list_node},
{next_list_node, RDF.first, element_node},
], state}
end
end
{first_list_node, statements ++ [{last_list_node, RDF.rest, RDF.nil}], state}
end
end
defp resolve_node(node, statements, state), do: {node, statements, state}
end

View file

@ -6,9 +6,9 @@ EOL = [\n\r]+
HEX = [0-9]|[A-F]|[a-f]
UCHAR = (\\u({HEX})({HEX})({HEX})({HEX}))|(\\U({HEX})({HEX})({HEX})({HEX})({HEX})({HEX})({HEX})({HEX}))
ECHAR = \\[tbnrf"'\\]
PN_CHARS_BASE = [A-Z]|[a-z]|[\x{00C0}-\x{00D6}]|[\x{00D8}-\x{00F6}]|[\x{00F8}-\x{02FF}]|[\x{0370}-\x{037D}]|[\x{037F}-\x{1FFF}]|[\x{200C}-\x{200D}]|[\x{2070}-\x{218F}]|[\x{2C00}-\x{2FEF}]|[\x{3001}-\x{D7FF}]|[\x{F900}-\x{FDCF}]|[\x{FDF0}-\x{FFFD}]|[\x{10000}-\x{EFFFF}]
PN_CHARS_U = {PN_CHARS_BASE}|_|:
PN_CHARS = {PN_CHARS_U}|-|[0-9]|\x{00B7}|[\x{0300}-\x{036F}]|[\x{203F}-\x{2040}]
PN_CHARS_BASE = ([A-Z]|[a-z]|[\x{00C0}-\x{00D6}]|[\x{00D8}-\x{00F6}]|[\x{00F8}-\x{02FF}]|[\x{0370}-\x{037D}]|[\x{037F}-\x{1FFF}]|[\x{200C}-\x{200D}]|[\x{2070}-\x{218F}]|[\x{2C00}-\x{2FEF}]|[\x{3001}-\x{D7FF}]|[\x{F900}-\x{FDCF}]|[\x{FDF0}-\x{FFFD}]|[\x{10000}-\x{EFFFF}])
PN_CHARS_U = ({PN_CHARS_BASE}|_|:)
PN_CHARS = ({PN_CHARS_U}|-|[0-9]|\x{00B7}|[\x{0300}-\x{036F}]|[\x{203F}-\x{2040}])
IRIREF = <([^\x00-\x20<>"{}|^`\\]|{UCHAR})*>
STRING_LITERAL_QUOTE = "([^\x22\x5C\x0A\x0D]|{ECHAR}|{UCHAR})*"
BLANK_NODE_LABEL = _:({PN_CHARS_U}|[0-9])(({PN_CHARS}|\.)*({PN_CHARS}))?

90
src/turtle_lexer.xrl Normal file
View file

@ -0,0 +1,90 @@
%% \00=NULL
%% \01-\x1F=control codes
%% \x20=space
Definitions.
COMMENT = #[^\n\r]*
WS = [\s\t\n\r]
ANON = \[{WS}*\]
HEX = [0-9]|[A-F]|[a-f]
UCHAR = (\\u({HEX})({HEX})({HEX})({HEX}))|(\\U({HEX})({HEX})({HEX})({HEX})({HEX})({HEX})({HEX})({HEX}))
ECHAR = \\[tbnrf"'\\]
PERCENT = %{HEX}{HEX}
PN_CHARS_BASE = ([A-Z]|[a-z]|[\x{00C0}-\x{00D6}]|[\x{00D8}-\x{00F6}]|[\x{00F8}-\x{02FF}]|[\x{0370}-\x{037D}]|[\x{037F}-\x{1FFF}]|[\x{200C}-\x{200D}]|[\x{2070}-\x{218F}]|[\x{2C00}-\x{2FEF}]|[\x{3001}-\x{D7FF}]|[\x{F900}-\x{FDCF}]|[\x{FDF0}-\x{FFFD}]|[\x{10000}-\x{EFFFF}])
PN_CHARS_U = ({PN_CHARS_BASE}|_)
PN_CHARS = ({PN_CHARS_U}|-|[0-9]|\x{00B7}|[\x{0300}-\x{036F}]|[\x{203F}-\x{2040}])
PN_PREFIX = ({PN_CHARS_BASE}(({PN_CHARS}|\.)*{PN_CHARS})?)
PN_LOCAL = ({PN_CHARS_U}|:|[0-9]|{PLX})(({PN_CHARS}|\.|:|{PLX})*({PN_CHARS}|:|{PLX}))?
PN_LOCAL_ESC = \\(_|\~|\.|\-|\!|\$|\&|\'|\(|\)|\*|\+|\,|\;|\=|\/|\?|\#|\@|\%)
PLX = {PERCENT}|{PN_LOCAL_ESC}
PNAME_NS = {PN_PREFIX}?:
PNAME_LN = {PNAME_NS}{PN_LOCAL}
EXPONENT = ([eE][+-]?[0-9]+)
BOOLEAN = true|false
INTEGER = [+-]?[0-9]+
DECIMAL = [+-]?[0-9]*\.[0-9]+
DOUBLE = [+-]?([0-9]+\.[0-9]*{EXPONENT}|\.[0-9]+{EXPONENT}|[0-9]+{EXPONENT})
IRIREF = <([^\x00-\x20<>"{}|^`\\]|{UCHAR})*>
STRING_LITERAL_QUOTE = "([^"\\\n\r]|{ECHAR}|{UCHAR})*"
STRING_LITERAL_SINGLE_QUOTE = '([^'\\\n\r]|{ECHAR}|{UCHAR})*'
STRING_LITERAL_LONG_SINGLE_QUOTE = '''(('|'')?([^'\\]|{ECHAR}|{UCHAR}))*'''
STRING_LITERAL_LONG_QUOTE = """(("|"")?([^"\\]|{ECHAR}|{UCHAR}))*"""
BLANK_NODE_LABEL = _:({PN_CHARS_U}|[0-9])(({PN_CHARS}|\.)*({PN_CHARS}))?
LANGTAG = @[a-zA-Z]+(-[a-zA-Z0-9]+)*
BASE = [Bb][Aa][Ss][Ee]
PREFIX = [Pp][Rr][Ee][Ff][Ii][Xx]
Rules.
@prefix : {token, {'@prefix', TokenLine}}.
@base : {token, {'@base', TokenLine}}.
{BASE} : {token, {'BASE', TokenLine}}.
{PREFIX} : {token, {'PREFIX', TokenLine}}.
{LANGTAG} : {token, {langtag, TokenLine, langtag_str(TokenChars)}}.
{IRIREF} : {token, {iriref, TokenLine, quoted_content_str(TokenChars)}}.
{DOUBLE} : {token, {double, TokenLine, double(TokenChars)}}.
{DECIMAL} : {token, {decimal, TokenLine, decimal(TokenChars)}}.
{INTEGER} : {token, {integer, TokenLine, integer(TokenChars)}}.
{BOOLEAN} : {token, {boolean, TokenLine, boolean(TokenChars)}}.
{STRING_LITERAL_SINGLE_QUOTE} : {token, {string_literal_quote, TokenLine, quoted_content_str(TokenChars)}}.
{STRING_LITERAL_QUOTE} : {token, {string_literal_quote, TokenLine, quoted_content_str(TokenChars)}}.
{STRING_LITERAL_LONG_SINGLE_QUOTE} : {token, {string_literal_quote, TokenLine, long_quoted_content_str(TokenChars)}}.
{STRING_LITERAL_LONG_QUOTE} : {token, {string_literal_quote, TokenLine, long_quoted_content_str(TokenChars)}}.
{BLANK_NODE_LABEL} : {token, {blank_node_label, TokenLine, bnode_str(TokenChars)}}.
{ANON} : {token, {anon, TokenLine}}.
a : {token, {'a', TokenLine}}.
{PNAME_NS} : {token, {prefix_ns, TokenLine, prefix_ns(TokenChars)}}.
{PNAME_LN} : {token, {prefix_ln, TokenLine, prefix_ln(TokenChars)}}.
; : {token, {';', TokenLine}}.
, : {token, {',', TokenLine}}.
\. : {token, {'.', TokenLine}}.
\[ : {token, {'[', TokenLine}}.
\] : {token, {']', TokenLine}}.
\( : {token, {'(', TokenLine}}.
\) : {token, {')', TokenLine}}.
\^\^ : {token, {'^^', TokenLine}}.
{WS}+ : skip_token.
{COMMENT} : skip_token.
Erlang code.
integer(TokenChars) -> 'Elixir.RDF.Serialization.ParseHelper':integer(TokenChars).
decimal(TokenChars) -> 'Elixir.RDF.Serialization.ParseHelper':decimal(TokenChars).
double(TokenChars) -> 'Elixir.RDF.Serialization.ParseHelper':double(TokenChars).
boolean(TokenChars) -> 'Elixir.RDF.Serialization.ParseHelper':boolean(TokenChars).
quoted_content_str(TokenChars) -> 'Elixir.RDF.Serialization.ParseHelper':quoted_content_str(TokenChars).
long_quoted_content_str(TokenChars) -> 'Elixir.RDF.Serialization.ParseHelper':long_quoted_content_str(TokenChars).
bnode_str(TokenChars) -> 'Elixir.RDF.Serialization.ParseHelper':bnode_str(TokenChars).
langtag_str(TokenChars) -> 'Elixir.RDF.Serialization.ParseHelper':langtag_str(TokenChars).
prefix_ns(TokenChars) -> 'Elixir.RDF.Serialization.ParseHelper':prefix_ns(TokenChars).
prefix_ln(TokenChars) -> 'Elixir.RDF.Serialization.ParseHelper':prefix_ln(TokenChars).

96
src/turtle_parser.yrl Normal file
View file

@ -0,0 +1,96 @@
%% Grammar for Turtle as specified in https://www.w3.org/TR/2014/REC-n-triples-20140225/
Nonterminals turtleDoc statement directive prefixID base sparqlPrefix sparqlBase
triples predicateObjectList objectList blankNodePropertyList
verb subject predicate object collection collection_elements
literal numericLiteral rdfLiteral booleanLiteral iri prefixedName blankNode.
Terminals prefix_ns prefix_ln iriref blank_node_label anon
string_literal_quote langtag integer decimal double boolean
'.' ';' ',' '[' ']' '(' ')' '^^' '@prefix' '@base' 'PREFIX' 'BASE' 'a' .
Rootsymbol turtleDoc.
turtleDoc -> statement : ['$1'] .
turtleDoc -> statement turtleDoc : ['$1' | '$2'] .
statement -> directive : {directive, '$1'} .
statement -> triples '.' : {triples, '$1'} .
directive -> prefixID : '$1' .
directive -> sparqlPrefix : '$1' .
directive -> base : '$1' .
directive -> sparqlBase : '$1' .
prefixID -> '@prefix' prefix_ns iriref '.' : {prefix, '$2', to_uri_string('$3')} .
sparqlPrefix -> 'PREFIX' prefix_ns iriref : {prefix, '$2', to_uri_string('$3')} .
sparqlBase -> 'BASE' iriref : {base, to_uri_string('$2')} .
base -> '@base' iriref '.' : {base, to_uri_string('$2')} .
triples -> subject predicateObjectList : { '$1', '$2' }.
triples -> blankNodePropertyList predicateObjectList : { '$1', '$2' }.
triples -> blankNodePropertyList : '$1'.
predicateObjectList -> verb objectList : [{'$1', '$2'}] .
predicateObjectList -> verb objectList ';' : [{'$1', '$2'}] .
predicateObjectList -> verb objectList ';' predicateObjectList : [{'$1', '$2'} | '$4'] .
objectList -> object : ['$1'] .
objectList -> object ',' objectList : ['$1' | '$3'] .
blankNodePropertyList -> '[' predicateObjectList ']' : {blankNodePropertyList, '$2'} .
verb -> 'a' : rdf_type() .
verb -> predicate : '$1' .
subject -> iri : '$1' .
subject -> blankNode : '$1' .
subject -> collection : '$1' .
predicate -> iri : '$1' .
object -> iri : '$1' .
object -> blankNode : '$1' .
object -> collection : '$1' .
object -> blankNodePropertyList : '$1' .
object -> literal : '$1' .
collection -> '(' ')' : {collection, []} .
collection -> '(' collection_elements ')' : {collection, '$2'} .
collection_elements -> object : ['$1'] .
collection_elements -> object collection_elements : ['$1' | '$2'] .
prefixedName -> prefix_ln : '$1' .
prefixedName -> prefix_ns : '$1' .
literal -> rdfLiteral : '$1' .
literal -> numericLiteral : '$1' .
literal -> booleanLiteral : '$1' .
rdfLiteral -> string_literal_quote '^^' iriref : to_literal('$1', {datatype, to_uri('$3')}) .
rdfLiteral -> string_literal_quote langtag : to_literal('$1', {language, to_langtag('$2')}) .
rdfLiteral -> string_literal_quote '@prefix' : to_literal('$1', {language, to_langtag('$2')}) .
rdfLiteral -> string_literal_quote '@base' : to_literal('$1', {language, to_langtag('$2')}) .
rdfLiteral -> string_literal_quote : to_literal('$1') .
numericLiteral -> integer : to_literal('$1') .
numericLiteral -> decimal : to_literal('$1') .
numericLiteral -> double : to_literal('$1') .
booleanLiteral -> boolean : to_literal('$1') .
iri -> iriref : to_uri('$1') .
iri -> prefixedName : '$1' .
blankNode -> blank_node_label : to_bnode('$1') .
blankNode -> anon : {anon} .
Erlang code.
to_uri_string(IRIREF) -> 'Elixir.RDF.Serialization.ParseHelper':to_uri_string(IRIREF) .
to_uri(IRIREF) ->
case 'Elixir.RDF.Serialization.ParseHelper':to_uri(IRIREF) of
{ok, URI} -> URI;
{error, ErrorLine, Message} -> return_error(ErrorLine, Message)
end.
to_bnode(BLANK_NODE) -> 'Elixir.RDF.Serialization.ParseHelper':to_bnode(BLANK_NODE).
to_literal(STRING_LITERAL_QUOTE) -> 'Elixir.RDF.Serialization.ParseHelper':to_literal(STRING_LITERAL_QUOTE).
to_literal(STRING_LITERAL_QUOTE, Type) -> 'Elixir.RDF.Serialization.ParseHelper':to_literal(STRING_LITERAL_QUOTE, Type).
to_langtag(LANGTAG) -> 'Elixir.RDF.Serialization.ParseHelper':to_langtag(LANGTAG).
rdf_type() -> 'Elixir.RDF.Serialization.ParseHelper':rdf_type().

View file

@ -0,0 +1,303 @@
defmodule RDF.Turtle.DecoderTest do
use ExUnit.Case, async: false
doctest RDF.Turtle.Decoder
import RDF.Sigils
alias RDF.{Turtle, Graph, TestData}
alias RDF.NS.{XSD}
use RDF.Vocabulary.Namespace
defvocab EX,
base_uri: "http://example.org/#",
terms: [], strict: false
defvocab P,
base_uri: "http://www.perceive.net/schemas/relationship/",
terms: [], strict: false
test "an empty string is deserialized to an empty graph" do
assert Turtle.Decoder.decode!("") == Graph.new
assert Turtle.Decoder.decode!(" \n\r\r\n ") == Graph.new
end
test "a single triple with URIs" do
assert Turtle.Decoder.decode!("""
<http://example.org/#Person> <http://xmlns.com/foaf/0.1/name> "Aaron Swartz" .
""") == Graph.new({EX.Person, ~I<http://xmlns.com/foaf/0.1/name>, "Aaron Swartz"})
end
test "decoding a single triple with a blank node" do
assert Turtle.Decoder.decode!("""
_:foo <http://example.org/#p> <http://example.org/#O> .
""") == Graph.new({RDF.bnode("foo"), EX.p, EX.O})
assert Turtle.Decoder.decode!("""
<http://example.org/#S> <http://example.org/#p> _:1 .
""") == Graph.new({EX.S, EX.p, RDF.bnode("1")})
assert Turtle.Decoder.decode!("""
_:foo <http://example.org/#p> _:bar .
""") == Graph.new({RDF.bnode("foo"), EX.p, RDF.bnode("bar")})
end
test "decoding a single triple with an untyped string literal" do
assert Turtle.Decoder.decode!("""
<http://example.org/#spiderman> <http://www.perceive.net/schemas/relationship/realname> "Peter Parker" .
""") == Graph.new({EX.spiderman, P.realname, RDF.literal("Peter Parker")})
end
test "decoding a single triple with an untyped long quoted string literal" do
assert Turtle.Decoder.decode!("""
<http://example.org/#spiderman> <http://www.perceive.net/schemas/relationship/realname> '''Peter Parker''' .
""") == Graph.new({EX.spiderman, P.realname, RDF.literal("Peter Parker")})
end
test "decoding a single triple with a typed literal" do
assert Turtle.Decoder.decode!("""
<http://example.org/#spiderman> <http://example.org/#p> "42"^^<http://www.w3.org/2001/XMLSchema#integer> .
""") == Graph.new({EX.spiderman, EX.p, RDF.literal(42)})
end
test "decoding a single triple with a language tagged literal" do
assert Turtle.Decoder.decode!("""
<http://example.org/#S> <http://example.org/#p> "foo"@en .
""") == Graph.new({EX.S, EX.p, RDF.literal("foo", language: "en")})
end
test "decoding a single triple with a '@prefix' or '@base' language tagged literal" do
assert Turtle.Decoder.decode!("""
<http://example.org/#S> <http://example.org/#p> "foo"@prefix .
""") == Graph.new({EX.S, EX.p, RDF.literal("foo", language: "prefix")})
assert Turtle.Decoder.decode!("""
<http://example.org/#S> <http://example.org/#p> "foo"@base .
""") == Graph.new({EX.S, EX.p, RDF.literal("foo", language: "base")})
end
test "decoding multiple triples" do
assert Turtle.Decoder.decode!("""
<http://example.org/#S1> <http://example.org/#p1> <http://example.org/#O1> .
<http://example.org/#S1> <http://example.org/#p2> <http://example.org/#O2> .
""") == Graph.new([
{EX.S1, EX.p1, EX.O1},
{EX.S1, EX.p2, EX.O2},
])
assert Turtle.Decoder.decode!("""
<http://example.org/#S1> <http://example.org/#p1> <http://example.org/#O1> .
<http://example.org/#S1> <http://example.org/#p2> <http://example.org/#O2> .
<http://example.org/#S2> <http://example.org/#p3> <http://example.org/#O3> .
""") == Graph.new([
{EX.S1, EX.p1, EX.O1},
{EX.S1, EX.p2, EX.O2},
{EX.S2, EX.p3, EX.O3}
])
end
test "a statement with the 'a' keyword" do
assert Turtle.Decoder.decode!("""
<http://example.org/#Aaron> a <http://example.org/#Person> .
""") == Graph.new({EX.Aaron, RDF.type, EX.Person})
end
test "a statement with a blank node via []" do
assert Turtle.Decoder.decode!("""
[] <http://xmlns.com/foaf/0.1/name> "Aaron Swartz" .
""") == Graph.new({RDF.bnode("b0"), ~I<http://xmlns.com/foaf/0.1/name>, "Aaron Swartz"})
assert Turtle.Decoder.decode!("""
<http://example.org/#Foo> <http://example.org/#bar> [] .
""") == Graph.new({EX.Foo, EX.bar, RDF.bnode("b0")})
assert Turtle.Decoder.decode!("""
<http://example.org/#Foo> <http://example.org/#bar> [ ] .
""") == Graph.new({EX.Foo, EX.bar, RDF.bnode("b0")})
end
test "a statement with a boolean" do
assert Turtle.Decoder.decode!("""
<http://example.org/#Foo> <http://example.org/#bar> true .
""") == Graph.new({EX.Foo, EX.bar, RDF.Boolean.new(true)})
assert Turtle.Decoder.decode!("""
<http://example.org/#Foo> <http://example.org/#bar> false .
""") == Graph.new({EX.Foo, EX.bar, RDF.Boolean.new(false)})
end
test "a statement with an integer" do
assert Turtle.Decoder.decode!("""
<http://example.org/#Foo> <http://example.org/#bar> 42 .
""") == Graph.new({EX.Foo, EX.bar, RDF.Integer.new(42)})
end
test "a statement with a decimal" do
assert Turtle.Decoder.decode!("""
<http://example.org/#Foo> <http://example.org/#bar> 3.14 .
""") == Graph.new({EX.Foo, EX.bar, RDF.Literal.new("3.14", datatype: XSD.decimal)})
end
test "a statement with a double" do
assert Turtle.Decoder.decode!("""
<http://example.org/#Foo> <http://example.org/#bar> 1.2e3 .
""") == Graph.new({EX.Foo, EX.bar, RDF.Double.new("1.2e3")})
end
test "a statement with multiple objects" do
assert Turtle.Decoder.decode!("""
<http://example.org/#Foo> <http://example.org/#bar> "baz", 1, true .
""") == Graph.new([
{EX.Foo, EX.bar, "baz"},
{EX.Foo, EX.bar, 1},
{EX.Foo, EX.bar, true},
])
end
test "a statement with multiple predications" do
assert Turtle.Decoder.decode!("""
<http://example.org/#Foo> <http://example.org/#bar> "baz";
<http://example.org/#baz> 42 .
""") == Graph.new([
{EX.Foo, EX.bar, "baz"},
{EX.Foo, EX.baz, 42},
])
end
test "a statement with a blank node property list on object position" do
assert Turtle.Decoder.decode!("""
<http://example.org/#Foo> <http://example.org/#bar> [ <http://example.org/#baz> 42 ] .
""") == Graph.new([
{EX.Foo, EX.bar, RDF.bnode("b0")},
{RDF.bnode("b0"), EX.baz, 42},
])
end
test "a statement with a blank node property list on subject position" do
assert Turtle.Decoder.decode!("""
[ <http://example.org/#baz> 42 ] <http://example.org/#bar> false .
""") == Graph.new([
{RDF.bnode("b0"), EX.baz, 42},
{RDF.bnode("b0"), EX.bar, false},
])
end
test "a single blank node property list" do
assert Turtle.Decoder.decode!("[ <http://example.org/#foo> 42 ] .") ==
Graph.new([{RDF.bnode("b0"), EX.foo, 42}])
end
test "a statement with prefixed names" do
assert Turtle.Decoder.decode!("""
@prefix ex: <http://example.org/#> .
ex:Aaron <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> ex:Person .
""") == Graph.new({EX.Aaron, RDF.type, EX.Person})
assert Turtle.Decoder.decode!("""
@prefix ex: <http://example.org/#> .
ex:Aaron <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> ex:Person .
""") == Graph.new({EX.Aaron, RDF.type, EX.Person})
assert Turtle.Decoder.decode!("""
PREFIX ex: <http://example.org/#>
ex:Aaron <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> ex:Person .
""") == Graph.new({EX.Aaron, RDF.type, EX.Person})
assert Turtle.Decoder.decode!("""
prefix ex: <http://example.org/#>
ex:Aaron <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> ex:Person .
""") == Graph.new({EX.Aaron, RDF.type, EX.Person})
end
test "a statement with an empty prefixed name" do
assert Turtle.Decoder.decode!("""
@prefix : <http://example.org/#> .
:Aaron <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> :Person .
""") == Graph.new({EX.Aaron, RDF.type, EX.Person})
assert Turtle.Decoder.decode!("""
PREFIX : <http://example.org/#>
:Aaron <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> :Person .
""") == Graph.new({EX.Aaron, RDF.type, EX.Person})
end
test "a statement with a collection" do
assert Turtle.Decoder.decode!("""
@prefix : <http://example.org/#> .
:subject :predicate ( :a :b :c ) .
""") == Graph.new([
{EX.subject, EX.predicate, RDF.bnode("b0")},
{RDF.bnode("b0"), RDF.first, EX.a},
{RDF.bnode("b0"), RDF.rest, RDF.bnode("b1")},
{RDF.bnode("b1"), RDF.first, EX.b},
{RDF.bnode("b1"), RDF.rest, RDF.bnode("b2")},
{RDF.bnode("b2"), RDF.first, EX.c},
{RDF.bnode("b2"), RDF.rest, RDF.nil},
])
end
test "a statement with an empty collection" do
assert Turtle.Decoder.decode!("""
@prefix : <http://example.org/#> .
:subject :predicate () .
""") == Graph.new({EX.subject, EX.predicate, RDF.nil})
end
test "decoding comments" do
assert Turtle.Decoder.decode!("# just a comment") == Graph.new
assert Turtle.Decoder.decode!("""
<http://example.org/#S> <http://example.org/#p> _:1 . # a comment
""") == Graph.new({EX.S, EX.p, RDF.bnode("1")})
assert Turtle.Decoder.decode!("""
# a comment
<http://example.org/#S> <http://example.org/#p> <http://example.org/#O> .
""") == Graph.new({EX.S, EX.p, EX.O})
assert Turtle.Decoder.decode!("""
<http://example.org/#S> <http://example.org/#p> <http://example.org/#O> .
# a comment
""") == Graph.new({EX.S, EX.p, EX.O})
assert Turtle.Decoder.decode!("""
# Header line 1
# Header line 2
<http://example.org/#S1> <http://example.org/#p1> <http://example.org/#O1> .
# 1st comment
<http://example.org/#S1> <http://example.org/#p2> <http://example.org/#O2> . # 2nd comment
# last comment
""") == Graph.new([
{EX.S1, EX.p1, EX.O1},
{EX.S1, EX.p2, EX.O2},
])
end
test "empty lines" do
assert Turtle.Decoder.decode!("""
<http://example.org/#spiderman> <http://www.perceive.net/schemas/relationship/enemyOf> <http://example.org/#green_goblin> .
""") == Graph.new({EX.spiderman, P.enemyOf, EX.green_goblin})
assert Turtle.Decoder.decode!("""
<http://example.org/#spiderman> <http://www.perceive.net/schemas/relationship/enemyOf> <http://example.org/#green_goblin> .
""") == Graph.new({EX.spiderman, P.enemyOf, EX.green_goblin})
assert Turtle.Decoder.decode!("""
<http://example.org/#S1> <http://example.org/#p1> <http://example.org/#O1> .
<http://example.org/#S1> <http://example.org/#p2> <http://example.org/#O2> .
""") == Graph.new([
{EX.S1, EX.p1, EX.O1},
{EX.S1, EX.p2, EX.O2},
])
end
end