defmodule RDF.Turtle.EncoderTest do use ExUnit.Case, async: false alias RDF.Turtle doctest Turtle.Encoder alias RDF.Graph alias RDF.NS alias RDF.NS.{RDFS, OWL} import RDF.Sigils use RDF.Vocabulary.Namespace defvocab EX, base_iri: "http://example.org/#", terms: [], strict: false describe "serializing a graph" do test "an empty graph is serialized to an empty string" do assert Turtle.Encoder.encode!(Graph.new(), prefixes: %{}) == "" end test "statements with IRIs only" do assert Turtle.Encoder.encode!( Graph.new([ {EX.S1, EX.p1(), EX.O1}, {EX.S1, EX.p1(), EX.O2}, {EX.S1, EX.p2(), EX.O3}, {EX.S2, EX.p3(), EX.O4} ]), prefixes: %{} ) == """ , ; . . """ end test "statements with prefixed names" do assert Turtle.Encoder.encode!( Graph.new([ {EX.S1, EX.p1(), EX.O1}, {EX.S1, EX.p1(), EX.O2}, {EX.S1, EX.p2(), EX.O3}, {EX.S2, EX.p3(), EX.O4} ]), prefixes: %{ ex: EX.__base_iri__(), xsd: NS.XSD.__base_iri__() } ) == """ @prefix ex: <#{to_string(EX.__base_iri__())}> . @prefix xsd: . ex:S1 ex:p1 ex:O1, ex:O2 ; ex:p2 ex:O3 . ex:S2 ex:p3 ex:O4 . """ end test "when no prefixes are given, the prefixes from the given graph are used" do assert Turtle.Encoder.encode!( Graph.new( [ {EX.S1, EX.p1(), EX.O1}, {EX.S1, EX.p1(), EX.O2}, {EX.S1, EX.p2(), NS.XSD.integer()}, {EX.S2, EX.p3(), EX.O4} ], prefixes: %{ "": EX.__base_iri__(), xsd: NS.XSD.__base_iri__() } ) ) == """ @prefix : <#{to_string(EX.__base_iri__())}> . @prefix xsd: . :S1 :p1 :O1, :O2 ; :p2 xsd:integer . :S2 :p3 :O4 . """ end test "when no base IRI is given, the base IRI from the given graph is used" do assert Turtle.Encoder.encode!( Graph.new([{EX.S1, EX.p1(), EX.O1}], prefixes: %{}, base_iri: EX.__base_iri__() ) ) == """ @base <#{to_string(EX.__base_iri__())}> . . """ end test "when a base IRI is given, it has used instead of the base IRI of the given graph" do assert Turtle.Encoder.encode!( Graph.new([{EX.S1, EX.p1(), EX.O1}], prefixes: %{}, base_iri: EX.other() ), base_iri: EX.__base_iri__() ) == """ @base <#{to_string(EX.__base_iri__())}> . . """ end test "when no prefixes are given and no prefixes are in the given graph the default_prefixes are used" do assert Turtle.Encoder.encode!(Graph.new({EX.S, EX.p(), NS.XSD.string()})) == """ @prefix rdf: <#{to_string(RDF.__base_iri__())}> . @prefix rdfs: <#{to_string(RDFS.__base_iri__())}> . @prefix xsd: <#{to_string(NS.XSD.__base_iri__())}> . xsd:string . """ end test "statements with empty prefixed names" do assert Turtle.Encoder.encode!(Graph.new({EX.S, EX.p(), EX.O}), prefixes: %{"" => EX.__base_iri__()} ) == """ @prefix : <#{to_string(EX.__base_iri__())}> . :S :p :O . """ end test "statements with literals" do assert Turtle.Encoder.encode!( Graph.new([ {EX.S1, EX.p1(), ~L"foo"}, {EX.S1, EX.p1(), ~L"foo"en}, {EX.S2, EX.p2(), RDF.literal("strange things", datatype: EX.custom())} ]), prefixes: %{} ) == """ "foo"@en, "foo" . "strange things"^^<#{EX.custom()}> . """ end test "statements with blank nodes" do assert Turtle.Encoder.encode!( Graph.new([ {EX.S1, EX.p1(), [RDF.bnode(1), RDF.bnode("foo"), RDF.bnode(:bar)]}, {EX.S2, EX.p1(), [RDF.bnode(1), RDF.bnode("foo"), RDF.bnode(:bar)]} ]), prefixes: %{} ) == """ _:1, _:bar, _:foo . _:1, _:bar, _:foo . """ end test "ordering of descriptions" do assert Turtle.Encoder.encode!( Graph.new([ {EX.__base_iri__(), RDF.type(), OWL.Ontology}, {EX.S1, RDF.type(), EX.O}, {EX.S2, RDF.type(), RDFS.Class}, {EX.S3, RDF.type(), RDF.Property} ]), base_iri: EX.__base_iri__(), prefixes: %{ rdf: RDF.__base_iri__(), rdfs: RDFS.__base_iri__(), owl: OWL.__base_iri__() } ) == """ @base <#{to_string(EX.__base_iri__())}> . @prefix rdf: <#{to_string(RDF.__base_iri__())}> . @prefix rdfs: <#{to_string(RDFS.__base_iri__())}> . @prefix owl: <#{to_string(OWL.__base_iri__())}> . <> a owl:Ontology . a rdfs:Class . a . a rdf:Property . """ end test "directive_style option" do assert Turtle.Encoder.encode!(Graph.new({EX.S, RDFS.subClassOf(), EX.O}), prefixes: %{rdfs: RDFS.__base_iri__()}, base_iri: EX.__base_iri__(), directive_style: :turtle ) == """ @base <#{to_string(EX.__base_iri__())}> . @prefix rdfs: <#{to_string(RDFS.__base_iri__())}> . rdfs:subClassOf . """ assert Turtle.Encoder.encode!(Graph.new({EX.S, RDFS.subClassOf(), EX.O}), prefixes: %{rdfs: RDFS.__base_iri__()}, base_iri: EX.__base_iri__(), directive_style: :sparql ) == """ BASE <#{to_string(EX.__base_iri__())}> PREFIX rdfs: <#{to_string(RDFS.__base_iri__())}> rdfs:subClassOf . """ end test "partial document" do graph = Graph.new({EX.S, RDFS.subClassOf(), EX.O}, prefixes: %{rdfs: RDFS.__base_iri__()}, base_iri: EX.__base_iri__() ) assert Turtle.Encoder.encode!(graph, only: :triples) == """ rdfs:subClassOf . """ assert Turtle.Encoder.encode!(graph, only: :prefixes) == """ @prefix rdfs: <#{to_string(RDFS.__base_iri__())}> . """ assert Turtle.Encoder.encode!(graph, only: :base) == """ @base <#{to_string(EX.__base_iri__())}> . """ assert Turtle.Encoder.encode!(graph, only: :directives, directive_style: :sparql) == """ BASE <#{to_string(EX.__base_iri__())}> PREFIX rdfs: <#{to_string(RDFS.__base_iri__())}> """ assert_raise RuntimeError, "unknown Turtle document element: :undefined", fn -> Turtle.Encoder.encode!(graph, only: :undefined) end end end describe "prefixed_name/2" do setup do {:ok, prefixes: %{ RDF.iri(EX.__base_iri__()) => "ex", ~I => "ex2" }} end test "hash iri with existing prefix", %{prefixes: prefixes} do assert Turtle.Encoder.prefixed_name(EX.foo(), prefixes) == "ex:foo" end test "hash iri namespace without name", %{prefixes: prefixes} do assert Turtle.Encoder.prefixed_name(RDF.iri(EX.__base_iri__()), prefixes) == "ex:" end test "hash iri with non-existing prefix" do refute Turtle.Encoder.prefixed_name(EX.foo(), %{}) end test "slash iri with existing prefix", %{prefixes: prefixes} do assert Turtle.Encoder.prefixed_name(~I, prefixes) == "ex2:foo" end test "slash iri namespace without name", %{prefixes: prefixes} do assert Turtle.Encoder.prefixed_name(~I, prefixes) == "ex2:" end test "slash iri with non-existing prefix" do refute Turtle.Encoder.prefixed_name(~I, %{}) end end %{ "full IRIs without base" => %{ input: " .", matches: [~r(\s+\s+\s+\.)] }, "relative IRIs with base" => %{ input: " .", matches: [~r(@base\s+\s+\.), ~r(\s+\s+\s+\.)m], base_iri: "http://a/" }, "pname IRIs with prefix" => %{ input: " .", matches: [ ~r(@prefix\s+ex:\s+\s+\.), ~r(ex:b\s+ex:c\s+ex:d\s+\.) ], prefixes: %{ex: "http://example.com/"} }, "pname IRIs with empty prefix" => %{ input: " .", matches: [ ~r(@prefix\s+:\s+\s+\.), ~r(:b\s+:c\s+:d\s+\.) ], prefixes: %{"" => "http://example.com/"} }, "object list" => %{ input: "@prefix ex: . ex:b ex:c ex:d, ex:e .", matches: [ ~r(@prefix\s+ex:\s+\s+\.), ~r(ex:b\s+ex:c\s+ex:[de],\s++ex:[de]\s+\.)m ], prefixes: %{"ex" => "http://example.com/"} }, "property list" => %{ input: "@prefix ex: . ex:b ex:c ex:d; ex:e ex:f .", matches: [ ~r(@prefix\s+ex:\s+\s+\.), ~r(ex:b\s+ex:c\s+ex:d\s+;), ~r(\s++ex:e\s+ex:f\s+\.) ], prefixes: %{"ex" => "http://example.com/"} }, "reuses BNode labels by default" => %{ input: "@prefix ex: . _:a ex:b _:a .", matches: [~r(\s*_:a\s+ex:b\s+_:a\s+\.)], prefixes: %{"ex" => "http://example.com/"} }, "bare anon" => %{ input: "@prefix ex: . [ex:a ex:b] .", matches: [~r(^\[\s*ex:a\s+ex:b\s\]\s+\.)m], prefixes: %{"ex" => "http://example.com/"} }, "anon as subject" => %{ input: "@prefix ex: . [ex:a ex:b] ex:c ex:d .", matches: [ ~r(\[\s*ex:a\s+ex:b\s*;)m, ~r(\sex:c\s+ex:d\s*\]\s+\.)m ], prefixes: %{"ex" => "http://example.com/"} }, "anon as object" => %{ input: "@prefix ex: . ex:a ex:b [ex:c ex:d] .", matches: [~r(ex:a\s+ex:b\s+\[\s*ex:c\s+ex:d\s*\]\s+\.)], neg_matches: [~r(_:\w+\s+\s*ex:c\s+ex:d\s+\.)], prefixes: %{"ex" => "http://example.com/"} }, # "generated BNodes with :unique_bnodes" => %{ # input: "@prefix ex: . _:a ex:b _:a .", # matches: [~r(^\s+*_:g\w+\s+ex:b\s+_:g\w+\s+\.$)], # unique_bnodes: true # }, # "standard prefixes" => %{ # input: """ # a ; # "Person" . # """, # matches: [ # ~r(^@prefix foaf: \.$), # ~r(^@prefix dc: \.$), # ~r(^ a foaf:Person;$), # ~r(dc:title "Person" \.$), # ], # standard_prefixes: true, prefixes: %{} # } "order properties" => %{ input: """ @prefix ex: . @prefix dc: . @prefix rdfs: . ex:b ex:c ex:d . ex:b dc:title "title" . ex:b a ex:class . ex:b rdfs:label "label" . """, matches: [ ~r(ex:b\s+a\s+ex:class\s*;)m, ~r(ex:class\s*;\s+rdfs:label\s+"label")m, ~r("label"\s*;\s++ex:c\s+ex:d)m, ~r(ex:d\s*;\s+dc:title\s+"title"\s+\.)m ], prefixes: %{ "ex" => "http://example.com/", "dc" => "http://purl.org/dc/elements/1.1/", "rdfs" => "http://www.w3.org/2000/01/rdf-schema#" } } } |> Enum.each(fn {name, data} -> @tag data: data test name, %{data: data} do assert_serialization(Turtle.read_string!(data.input), Keyword.new(data)) end end) describe "lists" do test "should generate literal list" do Turtle.read_string!( ~s[@prefix ex: . ex:a ex:b ( "apple" "banana" ) .] ) |> assert_serialization( prefixes: %{ex: ~I}, matches: [ {~r[ex:a\s+ex:b\s+\("apple" "banana"\)\s+\.], "doesn't include the list as a Turtle list"} ] ) end test "should generate empty list" do Turtle.read_string!(~s[@prefix ex: . ex:a ex:b () .]) |> assert_serialization( prefixes: %{ex: ~I}, matches: [ {~r[ex:a\s+ex:b\s+\(\)\s+\.], "doesn't include the list as a Turtle list"} ] ) end test "should generate empty list as subject" do Turtle.read_string!(~s[@prefix ex: . () ex:a ex:b .]) |> assert_serialization( prefixes: %{ex: ~I}, matches: [ {~r[\(\)\s+ex:a\s+ex:b\s+\.], "doesn't include the list as a Turtle list"} ] ) end test "should generate list as subject" do Turtle.read_string!(~s[@prefix ex: . (ex:a) ex:b ex:c .]) |> assert_serialization( prefixes: %{ex: ~I}, matches: [ {~r[\(ex:a\)\s+ex:b\s+ex:c\s+\.], "doesn't include the list as a Turtle list"} ] ) end test "should generate list of empties" do graph = Turtle.read_string!(~s{@prefix ex: . [ex:listOf2Empties (() ())] .}) serialization = assert_serialization(graph, prefixes: %{ex: ~I}, matches: [ {~r[\[\s*ex:listOf2Empties \(\(\) \(\)\)\s\]\s+\.], "doesn't include the list as a Turtle list"} ] ) refute String.contains?(serialization, to_string(RDF.first())), ~s[output\n\n#{serialization}\n\ncontains #{to_string(RDF.first())}] refute String.contains?(serialization, to_string(RDF.rest())), ~s[output\n\n#{serialization}\n\ncontains #{to_string(RDF.rest())}] end test "should generate list anon" do Turtle.read_string!( ~s{@prefix ex: . [ex:twoAnons ([a ex:mother] [a ex:father])] .} ) |> assert_serialization( prefixes: %{ex: ~I}, matches: [ {~r[\[\s*ex:twoAnons \(\[\s*a ex:mother\s*\]\s+\[\s*a ex:father\s*\]\s*\)\s*\]\s+\.], "doesn't include the list as a Turtle list"} ] ) end # TODO: Why should this test from RDF.rb work? Why should the `a owl:Class` statements about the list nodes be ignored? # test "should generate owl:unionOf list" do # Turtle.read_string!(""" # @prefix ex: . # @prefix owl: . # @prefix rdf: . # @prefix rdfs: . # ex:a rdfs:domain [ # a owl:Class; # owl:unionOf [ # a owl:Class; # rdf:first ex:b; # rdf:rest [ # a owl:Class; # rdf:first ex:c; # rdf:rest rdf:nil # ] # ] # ] . # """) # |> assert_serialization( # prefixes: %{ # ex: ~I, # rdf: RDF.NS.RDF.__base_iri__, # rdfs: RDFS.__base_iri__, # owl: OWL.__base_iri__, # }, # matches: [ # {~r[ex:a\s+rdfs:domain \[\s+a owl:Class;\s+owl:unionOf\s+\(ex:b\s+ex:c\)\s*\]\s*\.], # "doesn't include the list as a Turtle list"} # ] # ) # # end test "when one of the list nodes is referenced in other statements the whole list is not represented as a Turtle list structure" do Graph.new( ~B |> RDF.first(EX.Foo) |> RDF.rest(~B) ) |> Graph.add( ~B |> RDF.first(EX.Bar) |> RDF.rest(RDF.nil()) ) |> Graph.add({EX.Baz, EX.quux(), ~B}) |> assert_serialization( prefixes: %{ex: EX.__base_iri__()}, # TODO: provide a positive match neg_matches: [ {~r[\(\s*ex:Foo\s+ex:Bar\s*\)], "does include the list as a Turtle list"} ] ) end test "when given an invalid list" do Graph.new( ~B |> RDF.first(1) |> RDF.rest(EX.Foo) ) |> assert_serialization( prefixes: %{ex: ~I}, # TODO: provide a positive match neg_matches: [ {~r[\[\s*_:Foo \(\(\) \(\)\)\]\s+\.], "does include the invalid list as a Turtle list"} ] ) end end describe "literals" do test "plain literals with newlines embedded are encoded with long quotes" do Turtle.read_string!(~s[ """testing string parsing in Turtle. """ .]) |> assert_serialization(matches: [~s["""testing string parsing in Turtle.\n]]) end test "plain literals escaping" do Turtle.read_string!(~s[ """string with " escaped quote marks""" .]) |> assert_serialization( matches: [ ~r[string with \\" escaped quote mark] ] ) end test "language tagged literals specifies language for literal with language" do Turtle.read_string!(~s[ "string"@en .]) |> assert_serialization(matches: [~r["string"@en]]) end test "typed literals" do Turtle.read_string!( ~s[@prefix xsd: . "http://foo/"^^xsd:anyURI .] ) |> assert_serialization( matches: [ ~r["http://foo/"\^\^ \.] ] ) end test "typed literals use declared prefixes" do Turtle.read_string!( ~s[@prefix xsd: . "http://foo/"^^xsd:anyURI .] ) |> assert_serialization( matches: [ ~r[@prefix xsd: \.], ~r["http://foo/"\^\^xsd:anyURI \.] ], prefixes: %{xsd: NS.XSD.__base_iri__()} ) end test "valid booleans" do [ {true, "true ."}, {"true", "true ."}, {"1", "true ."}, {false, "false ."}, {"false", "false ."}, {"0", "false ."} ] |> Enum.each(fn {value, output} -> Graph.new({EX.S, EX.p(), RDF.XSD.boolean(value)}) |> assert_serialization(matches: [output]) end) end test "invalid booleans" do [ {"string", ~s{"string"^^}}, {"42", ~s{"42"^^}}, {"TrUe", ~s{"TrUe"^^}}, {"FaLsE", ~s{"FaLsE"^^}} ] |> Enum.each(fn {value, output} -> Graph.new({EX.S, EX.p(), RDF.XSD.boolean(value)}) |> assert_serialization(matches: [output]) end) end test "valid integers" do [ {0, "0 ."}, {"0", "0 ."}, {1, "1 ."}, {"1", "1 ."}, {-1, "-1 ."}, {"-1", "-1 ."}, {10, "10 ."}, {"10", "10 ."}, {"0010", "10 ."} ] |> Enum.each(fn {value, output} -> Graph.new({EX.S, EX.p(), RDF.XSD.integer(value)}) |> assert_serialization(matches: [output]) end) end test "invalid integers" do [ {"string", ~s{"string"^^}}, {"true", ~s{"true"^^}} ] |> Enum.each(fn {value, output} -> Graph.new({EX.S, EX.p(), RDF.XSD.integer(value)}) |> assert_serialization(matches: [output]) end) end test "valid decimals" do [ {1.0, "1.0 ."}, {"1.0", "1.0 ."}, {0.1, "0.1 ."}, {"0.1", "0.1 ."}, {-1, "-1.0 ."}, {"-1", "-1.0 ."}, {10.02, "10.02 ."}, {"10.02", "10.02 ."}, {"010.020", "10.02 ."} ] |> Enum.each(fn {value, output} -> Graph.new({EX.S, EX.p(), RDF.XSD.decimal(value)}) |> assert_serialization(matches: [output]) end) end test "invalid decimals" do [ {"string", ~s{"string"^^}}, {"true", ~s{"true"^^}} ] |> Enum.each(fn {value, output} -> Graph.new({EX.S, EX.p(), RDF.XSD.decimal(value)}) |> assert_serialization(matches: [output]) end) end test "valid doubles" do [ {1.0e1, "1.0E1 ."}, {"1.0e1", "1.0E1 ."}, {0.1e1, "1.0E0 ."}, {"0.1e1", "1.0E0 ."}, {10.02e1, "1.002E2 ."}, {"10.02e1", "1.002E2 ."}, {"010.020", "1.002E1 ."}, {14, "1.4E1 ."}, {-1, "-1.0E0 ."}, {"-1", "-1.0E0 ."} ] |> Enum.each(fn {value, output} -> Graph.new({EX.S, EX.p(), RDF.XSD.double(value)}) |> assert_serialization(matches: [output]) end) end test "invalid doubles" do [ {"string", ~s{"string"^^}}, {"true", ~s{"true"^^}} ] |> Enum.each(fn {value, output} -> Graph.new({EX.S, EX.p(), RDF.XSD.double(value)}) |> assert_serialization(matches: [output]) end) end end describe "W3C test suite roundtrip" do @tag skip: "TODO: We need a Graph isomorphism comparison to implement this." test "..." end defp assert_serialization(graph, opts) do with prefixes = Keyword.get(opts, :prefixes, %{}), base_iri = Keyword.get(opts, :base_iri), matches = Keyword.get(opts, :matches, []), neg_matches = Keyword.get(opts, :neg_matches, []) do assert {:ok, serialized} = Turtle.write_string(graph, prefixes: prefixes, base_iri: base_iri) matches |> Stream.map(fn {pattern, message} -> {pattern, ~s[output\n\n#{serialized}\n\n#{message}]} pattern -> {pattern, ~s[output\n\n#{serialized}\n\ndoesn't include #{inspect(pattern)}]} end) |> Enum.each(fn {%Regex{} = pattern, message} -> assert Regex.match?(pattern, serialized), message {contents, message} -> assert String.contains?(serialized, contents), message end) neg_matches |> Stream.map(fn {pattern, message} -> {pattern, ~s[output\n\n#{serialized}\n\n#{message}]} pattern -> {pattern, ~s[output\n\n#{serialized}\n\ndoes include #{inspect(pattern)}]} end) |> Enum.each(fn {%Regex{} = pattern, message} -> refute Regex.match?(pattern, serialized), message {contents, message} -> refute String.contains?(serialized, contents), message end) serialized end end end