From 3480c254c31dff2996c7713e839d362cbab6e0f9 Mon Sep 17 00:00:00 2001 From: Marcel Otto Date: Sun, 9 May 2021 11:58:36 +0200 Subject: [PATCH] Add missing escaping of language-tagged literals in Turtle encoder --- CHANGELOG.md | 2 ++ lib/rdf/serializations/turtle_encoder.ex | 2 +- test/unit/turtle_encoder_test.exs | 15 +++++++++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9b51114..f48e435 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ This project adheres to [Semantic Versioning](http://semver.org/) and - the Turtle encoder was encoding IRIs as prefixed names even when they were resulting in non-conform prefixed names +- the Turtle encoder didn't properly escape special characters in language-tagged + literals - the `Inspect` protocol implementation for `RDF.Diff` was causing an error when both graphs had prefixes defined diff --git a/lib/rdf/serializations/turtle_encoder.ex b/lib/rdf/serializations/turtle_encoder.ex index 1d4ba2f..889c9f7 100644 --- a/lib/rdf/serializations/turtle_encoder.ex +++ b/lib/rdf/serializations/turtle_encoder.ex @@ -351,7 +351,7 @@ defmodule RDF.Turtle.Encoder do do: to_string(bnode) defp term(%Literal{literal: %LangString{} = lang_string}, _, _, _) do - ~s["#{lang_string.value}"@#{lang_string.language}] + quoted(lang_string.value) <> "@" <> lang_string.language end defp term(%Literal{literal: %XSD.String{}} = literal, _, _, _) do diff --git a/test/unit/turtle_encoder_test.exs b/test/unit/turtle_encoder_test.exs index e62f30d..2403ae4 100644 --- a/test/unit/turtle_encoder_test.exs +++ b/test/unit/turtle_encoder_test.exs @@ -738,6 +738,21 @@ defmodule RDF.Turtle.EncoderTest do ) end + test "language-tagged literals with newlines embedded are encoded with long quotes" do + Turtle.read_string!(~s[ """testing string parsing in Turtle. + """@en .]) + |> assert_serialization(matches: [~s["""testing string parsing in Turtle.\n]]) + end + + test "language-tagged literals escaping" do + Turtle.read_string!(~s[ """string with " escaped quote marks"""@en .]) + |> assert_serialization( + matches: [ + ~r[string with \\" escaped quote mark] + ] + ) + end + test "language tagged literals specifies language for literal with language" do Turtle.read_string!(~s[ "string"@en .]) |> assert_serialization(matches: [~r["string"@en]])