Add RDF.NQuads.Decoder.decode_from_stream/2

This commit is contained in:
Marcel Otto 2020-11-04 16:44:53 +01:00
parent b4f0ae074c
commit 25b7239843
2 changed files with 82 additions and 31 deletions

View file

@ -9,18 +9,39 @@ defmodule RDF.NQuads.Decoder do
@impl RDF.Serialization.Decoder
@spec decode(String.t(), keyword) :: {:ok, Dataset.t()} | {:error, any}
def decode(content, _opts \\ []) do
with {:ok, tokens, _} <- tokenize(content),
{:ok, ast} <- parse(tokens) do
def decode(string, _opts \\ []) do
with {:ok, ast} <- do_decode(string, true) do
{:ok, build_dataset(ast)}
end
end
@impl RDF.Serialization.Decoder
@spec decode_from_stream(Enumerable.t(), keyword) :: Dataset.t()
def decode_from_stream(stream, _opts \\ []) do
Enum.reduce(stream, Dataset.new(), fn line, dataset ->
case do_decode(line, false) do
{:ok, []} -> dataset
{:ok, [[quad]]} -> Dataset.add(dataset, quad)
{:error, error} -> raise error
end
end)
end
defp do_decode(content, error_with_line_number) do
with {:ok, tokens, _} <- tokenize(content) do
parse(tokens)
else
{:error, {error_line, :ntriples_lexer, error_descriptor}, _error_line_again} ->
{:error,
"N-Quad scanner error on line #{error_line}: #{error_description(error_descriptor)}"}
"N-Quad scanner error#{if error_with_line_number, do: " on line #{error_line}"}: #{
error_description(error_descriptor)
}"}
{:error, {error_line, :nquads_parser, error_descriptor}} ->
{:error,
"N-Quad parser error on line #{error_line}: #{error_description(error_descriptor)}"}
"N-Quad parser error#{if error_with_line_number, do: " on line #{error_line}"}: #{
error_description(error_descriptor)
}"}
end
end
@ -28,7 +49,9 @@ defmodule RDF.NQuads.Decoder do
defp parse(tokens), do: tokens |> :nquads_parser.parse()
defp build_dataset(ast) do
Enum.reduce(ast, Dataset.new(), &Dataset.add(&2, &1))
defp build_dataset([]), do: Dataset.new()
defp build_dataset([quads]) do
Enum.reduce(quads, Dataset.new(), &Dataset.add(&2, &1))
end
end

View file

@ -3,6 +3,7 @@ defmodule RDF.NQuads.DecoderTest do
doctest RDF.NQuads.Decoder
alias RDF.NQuads.Decoder
alias RDF.Dataset
import RDF.Sigils
@ -13,29 +14,36 @@ defmodule RDF.NQuads.DecoderTest do
defvocab P, base_iri: "http://www.perceive.net/schemas/relationship/", terms: [], strict: false
import RDF.Sigils
import RDF.Test.Case, only: [string_to_stream: 1]
test "stream_support?/0" do
assert Decoder.stream_support?()
end
test "an empty string is deserialized to an empty graph" do
assert RDF.NQuads.Decoder.decode!("") == Dataset.new()
assert RDF.NQuads.Decoder.decode!(" \n\r\r\n ") == Dataset.new()
assert Decoder.decode!("") == Dataset.new()
assert Decoder.decode!(" \n\r\r\n ") == Dataset.new()
end
test "decoding comments" do
assert RDF.NQuads.Decoder.decode!("# just a comment") == Dataset.new()
assert Decoder.decode!("# just a comment") == Dataset.new()
assert RDF.NQuads.Decoder.decode!("""
assert Decoder.decode!("""
<http://example.org/#S> <http://example.org/#p> _:1 <http://example.org/#G>. # a comment
""") == Dataset.new({EX.S, EX.p(), RDF.bnode("1"), EX.G})
assert RDF.NQuads.Decoder.decode!("""
assert Decoder.decode!("""
# a comment
<http://example.org/#S> <http://example.org/#p> <http://example.org/#O> <http://example.org/#G>.
""") == Dataset.new({EX.S, EX.p(), EX.O, EX.G})
assert RDF.NQuads.Decoder.decode!("""
assert Decoder.decode!("""
<http://example.org/#S> <http://example.org/#p> <http://example.org/#O> <http://example.org/#G>.
# a comment
""") == Dataset.new({EX.S, EX.p(), EX.O, EX.G})
assert RDF.NQuads.Decoder.decode!("""
assert Decoder.decode!("""
# Header line 1
# Header line 2
<http://example.org/#S1> <http://example.org/#p1> <http://example.org/#O1> <http://example.org/#G> .
@ -50,7 +58,7 @@ defmodule RDF.NQuads.DecoderTest do
end
test "empty lines" do
assert RDF.NQuads.Decoder.decode!("""
assert Decoder.decode!("""
<http://example.org/#spiderman> <http://www.perceive.net/schemas/relationship/enemyOf> <http://example.org/#green_goblin> <http://example.org/graphs/spiderman> .
""") ==
@ -59,7 +67,7 @@ defmodule RDF.NQuads.DecoderTest do
~I<http://example.org/graphs/spiderman>}
)
assert RDF.NQuads.Decoder.decode!("""
assert Decoder.decode!("""
<http://example.org/#spiderman> <http://www.perceive.net/schemas/relationship/enemyOf> <http://example.org/#green_goblin> <http://example.org/graphs/spiderman> .
""") ==
@ -68,7 +76,7 @@ defmodule RDF.NQuads.DecoderTest do
~I<http://example.org/graphs/spiderman>}
)
assert RDF.NQuads.Decoder.decode!("""
assert Decoder.decode!("""
<http://example.org/#S1> <http://example.org/#p1> <http://example.org/#O1> .
@ -83,11 +91,11 @@ defmodule RDF.NQuads.DecoderTest do
end
test "decoding a single statement with iris" do
assert RDF.NQuads.Decoder.decode!("""
assert Decoder.decode!("""
<http://example.org/#spiderman> <http://www.perceive.net/schemas/relationship/enemyOf> <http://example.org/#green_goblin> .
""") == Dataset.new({EX.spiderman(), P.enemyOf(), EX.green_goblin()})
assert RDF.NQuads.Decoder.decode!("""
assert Decoder.decode!("""
<http://example.org/#spiderman> <http://www.perceive.net/schemas/relationship/enemyOf> <http://example.org/#green_goblin> <http://example.org/graphs/spiderman>.
""") ==
Dataset.new(
@ -97,55 +105,55 @@ defmodule RDF.NQuads.DecoderTest do
end
test "decoding a single statement with a blank node" do
assert RDF.NQuads.Decoder.decode!("""
assert Decoder.decode!("""
_:foo <http://example.org/#p> <http://example.org/#O> <http://example.org/#G> .
""") == Dataset.new({RDF.bnode("foo"), EX.p(), EX.O, EX.G})
assert RDF.NQuads.Decoder.decode!("""
assert Decoder.decode!("""
<http://example.org/#S> <http://example.org/#p> _:1 <http://example.org/#G> .
""") == Dataset.new({EX.S, EX.p(), RDF.bnode("1"), EX.G})
assert RDF.NQuads.Decoder.decode!("""
assert Decoder.decode!("""
_:foo <http://example.org/#p> _:bar <http://example.org/#G> .
""") == Dataset.new({RDF.bnode("foo"), EX.p(), RDF.bnode("bar"), EX.G})
assert RDF.NQuads.Decoder.decode!("""
assert Decoder.decode!("""
<http://example.org/#S> <http://example.org/#p> _:1 _:G .
""") == Dataset.new({EX.S, EX.p(), RDF.bnode("1"), RDF.bnode("G")})
end
test "decoding a single statement with an untyped string literal" do
assert RDF.NQuads.Decoder.decode!("""
assert Decoder.decode!("""
<http://example.org/#spiderman> <http://www.perceive.net/schemas/relationship/realname> "Peter Parker" <http://example.org/#G> .
""") == Dataset.new({EX.spiderman(), P.realname(), RDF.literal("Peter Parker"), EX.G})
assert RDF.NQuads.Decoder.decode!("""
assert Decoder.decode!("""
<http://example.org/#spiderman> <http://www.perceive.net/schemas/relationship/realname> "Peter Parker" .
""") == Dataset.new({EX.spiderman(), P.realname(), RDF.literal("Peter Parker")})
end
test "decoding a single statement with a typed literal" do
assert RDF.NQuads.Decoder.decode!("""
assert Decoder.decode!("""
<http://example.org/#spiderman> <http://example.org/#p> "42"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/#G> .
""") == Dataset.new({EX.spiderman(), EX.p(), RDF.literal(42), EX.G})
assert RDF.NQuads.Decoder.decode!("""
assert Decoder.decode!("""
<http://example.org/#spiderman> <http://example.org/#p> "42"^^<http://www.w3.org/2001/XMLSchema#integer> .
""") == Dataset.new({EX.spiderman(), EX.p(), RDF.literal(42)})
end
test "decoding a single statement with a language tagged literal" do
assert RDF.NQuads.Decoder.decode!("""
assert Decoder.decode!("""
<http://example.org/#S> <http://example.org/#p> "foo"@en <http://example.org/#G> .
""") == Dataset.new({EX.S, EX.p(), RDF.literal("foo", language: "en"), EX.G})
assert RDF.NQuads.Decoder.decode!("""
assert Decoder.decode!("""
<http://example.org/#S> <http://example.org/#p> "foo"@en .
""") == Dataset.new({EX.S, EX.p(), RDF.literal("foo", language: "en")})
end
test "decoding multiple statements" do
assert RDF.NQuads.Decoder.decode!("""
assert Decoder.decode!("""
<http://example.org/#S1> <http://example.org/#p1> <http://example.org/#O1> <http://example.org/#G> .
<http://example.org/#S1> <http://example.org/#p2> <http://example.org/#O2> <http://example.org/#G> .
""") ==
@ -154,10 +162,11 @@ defmodule RDF.NQuads.DecoderTest do
{EX.S1, EX.p2(), EX.O2, EX.G}
])
assert RDF.NQuads.Decoder.decode!("""
assert Decoder.decode!("""
<http://example.org/#S1> <http://example.org/#p1> <http://example.org/#O1> <http://example.org/#G> .
<http://example.org/#S1> <http://example.org/#p2> <http://example.org/#O2> <http://example.org/#G> .
<http://example.org/#S2> <http://example.org/#p3> <http://example.org/#O3> <http://example.org/#G> .
<http://example.org/#S2> <http://example.org/#p3> <http://example.org/#O3> .
""") ==
Dataset.new([
@ -167,4 +176,23 @@ defmodule RDF.NQuads.DecoderTest do
{EX.S2, EX.p3(), EX.O3}
])
end
test "decode_from_stream/2" do
assert """
<http://example.org/#S1> <http://example.org/#p1> <http://example.org/#O1> <http://example.org/#G> .
<http://example.org/#S1> <http://example.org/#p2> <http://example.org/#O2> <http://example.org/#G> .
<http://example.org/#S2> <http://example.org/#p3> _:foo <http://example.org/#G> .
<http://example.org/#S2> <http://example.org/#p3> "foo"@en .
"""
|> string_to_stream()
|> Decoder.decode_from_stream() ==
Dataset.new([
{EX.S1, EX.p1(), EX.O1, EX.G},
{EX.S1, EX.p2(), EX.O2, EX.G},
{EX.S2, EX.p3(), ~B"foo", EX.G},
{EX.S2, EX.p3(), ~L"foo"en}
])
end
end