From b7291c7d93c5d989c4d5427485c5e5dc6dfe434b Mon Sep 17 00:00:00 2001 From: Marcel Otto Date: Sun, 4 Nov 2018 15:54:49 +0100 Subject: [PATCH] Proper handling of comparisons between date literals --- CHANGELOG.md | 3 +- lib/rdf/datatype.ex | 4 +- lib/rdf/datatypes/date.ex | 59 ++++++++++++--- lib/rdf/datatypes/date_time.ex | 68 +++++++++++++++++ lib/rdf/literal.ex | 2 + test/unit/equality_test.exs | 46 +++++++++--- test/unit/literal_comparison_test.exs | 101 ++++++++++++++++++-------- 7 files changed, 228 insertions(+), 55 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 859f632..7ad0b36 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,7 +22,8 @@ This project adheres to [Semantic Versioning](http://semver.org/) and ### Fixed -- `RDF.Date.equal_value?/2` did not handle timezones correctly +- `RDF.DateTime.equal_value?/2` and `RDF.Date.equal_value?/2` did not handle + timezones correctly - `-00:00` is a valid timezone offset on `RDF.DateTime` diff --git a/lib/rdf/datatype.ex b/lib/rdf/datatype.ex index ea7c05e..0984748 100644 --- a/lib/rdf/datatype.ex +++ b/lib/rdf/datatype.ex @@ -83,6 +83,8 @@ defmodule RDF.Datatype do Returns `:gt` if first literal is greater than the second in terms of their datatype and `:lt` for vice versa. If the two literals are equal `:eq` is returned. + For datatypes with only partial ordering `:indeterminate` is returned when the + order of the given literals is not defined. Returns `nil` when the given arguments are not comparable datatypes or if one them is invalid. @@ -90,7 +92,7 @@ defmodule RDF.Datatype do The default implementation of the `_using__` macro compares the values of the `canonical/1` forms of the given literals of this datatype. """ - @callback compare(literal1 :: RDF.Literal.t, literal2 :: RDF.Literal.t) :: :lt | :gt | :eq | nil + @callback compare(literal1 :: RDF.Literal.t, literal2 :: RDF.Literal.t) :: :lt | :gt | :eq | :indeterminate | nil @lang_string RDF.iri("http://www.w3.org/1999/02/22-rdf-syntax-ns#langString") diff --git a/lib/rdf/datatypes/date.ex b/lib/rdf/datatypes/date.ex index 6b5518b..92fd4f2 100644 --- a/lib/rdf/datatypes/date.ex +++ b/lib/rdf/datatypes/date.ex @@ -99,21 +99,56 @@ defmodule RDF.Date do @impl RDF.Datatype - def equal_value?(%Literal{datatype: @id, value: value1} = left, - %Literal{datatype: @id, value: value2} = right) - when is_nil(value1) or is_nil(value2), - do: left.uncanonical_lexical == right.uncanonical_lexical + def equal_value?(literal1, literal2) - def equal_value?(%Literal{datatype: @id} = left, %Literal{datatype: @id} = right), - do: equal_normalization(left).value == equal_normalization(right).value + def equal_value?(%Literal{datatype: @id, value: nil, uncanonical_lexical: lexical1}, + %Literal{datatype: @id, value: nil, uncanonical_lexical: lexical2}) do + lexical1 == lexical2 + end + + def equal_value?(%Literal{datatype: @id, value: value1}, + %Literal{datatype: @id, value: value2}) + when is_nil(value1) or is_nil(value2), do: false + + def equal_value?(%Literal{datatype: @id, value: value1}, + %Literal{datatype: @id, value: value2}) do + RDF.DateTime.equal_value?( + comparison_normalization(value1), + comparison_normalization(value2) + ) + end def equal_value?(_, _), do: nil - defp equal_normalization(%{value: {value, "-00:00"}}), - do: new(value, %{tz: "Z"}) - defp equal_normalization(%{value: value}) when not is_tuple(value), - do: new(value, %{tz: "Z"}) - defp equal_normalization(literal), - do: literal + + @impl RDF.Datatype + def compare(left, right) + + def compare(%Literal{datatype: @id, value: value1}, + %Literal{datatype: @id, value: value2}) + when is_nil(value1) or is_nil(value2), do: nil + + def compare(%Literal{datatype: @id, value: value1}, + %Literal{datatype: @id, value: value2}) do + RDF.DateTime.compare( + comparison_normalization(value1), + comparison_normalization(value2) + ) + end + + def compare(_, _), do: nil + + + defp comparison_normalization({date, tz}) do + (Date.to_iso8601(date) <> "T00:00:00" <> tz) + |> RDF.DateTime.new() + end + + defp comparison_normalization(%Date{} = date) do + (Date.to_iso8601(date) <> "T00:00:00") + |> RDF.DateTime.new() + end + + defp comparison_normalization(_), do: nil end diff --git a/lib/rdf/datatypes/date_time.ex b/lib/rdf/datatypes/date_time.ex index b6e1946..9a012aa 100644 --- a/lib/rdf/datatypes/date_time.ex +++ b/lib/rdf/datatypes/date_time.ex @@ -152,4 +152,72 @@ defmodule RDF.DateTime do end end + + @impl RDF.Datatype + def equal_value?(literal1, literal2) + + def equal_value?(%Literal{datatype: @id, value: %type{} = value1}, + %Literal{datatype: @id, value: %type{} = value2}) + do + type.compare(value1, value2) == :eq + end + + def equal_value?(%Literal{datatype: @id, value: nil, uncanonical_lexical: lexical1}, + %Literal{datatype: @id, value: nil, uncanonical_lexical: lexical2}) do + lexical1 == lexical2 + end + + def equal_value?(%Literal{datatype: @id} = literal1, %Literal{datatype: @id} = literal2) do + case compare(literal1, literal2) do + :lt -> false + :gt -> false + :eq -> true # This actually can't/shouldn't happen. + _ -> nil + end + end + + def equal_value?(%RDF.Literal{} = left, right) when not is_nil(right) do + unless RDF.Term.term?(right) do + equal_value?(left, RDF.Term.coerce(right)) + end + end + + def equal_value?(_, _), do: nil + + + @impl RDF.Datatype + def compare(left, right) + + def compare(%Literal{datatype: @id, value: %type{} = value1}, + %Literal{datatype: @id, value: %type{} = value2}) do + type.compare(value1, value2) + end + + def compare(%Literal{datatype: @id, value: %DateTime{}} = literal1, + %Literal{datatype: @id, value: %NaiveDateTime{} = value2}) do + cond do + compare(literal1, new(to_datetime(value2, "+"))) == :lt -> :lt + compare(literal1, new(to_datetime(value2, "-"))) == :gt -> :gt + true -> :indeterminate + end + end + + def compare(%Literal{datatype: @id, value: %NaiveDateTime{} = value1}, + %Literal{datatype: @id, value: %DateTime{}} = literal2) do + cond do + compare(new(to_datetime(value1, "-")), literal2) == :lt -> :lt + compare(new(to_datetime(value1, "+")), literal2) == :gt -> :gt + true -> :indeterminate + end + end + + def compare(_, _), do: nil + + + defp to_datetime(naive_datetime, offset) do + (NaiveDateTime.to_iso8601(naive_datetime) <> offset <> "14:00") + |> DateTime.from_iso8601() + |> elem(1) + end + end diff --git a/lib/rdf/literal.ex b/lib/rdf/literal.ex index 42e5af8..77faae9 100644 --- a/lib/rdf/literal.ex +++ b/lib/rdf/literal.ex @@ -293,6 +293,8 @@ defmodule RDF.Literal do Returns `:gt` if first literal is greater than the second in terms of their datatype and `:lt` for vice versa. If the two literals are equal `:eq` is returned. + For datatypes with only partial ordering `:indeterminate` is returned when the + order of the given literals is not defined. Returns `nil` when the given arguments are not comparable datatypes. diff --git a/test/unit/equality_test.exs b/test/unit/equality_test.exs index fbdf17d..570418b 100644 --- a/test/unit/equality_test.exs +++ b/test/unit/equality_test.exs @@ -231,8 +231,11 @@ defmodule RDF.EqualityTest do {RDF.date_time("2002-04-02T12:00:00-01:00"), RDF.date_time("2002-04-02T17:00:00+04:00")}, {RDF.date_time("2002-04-02T23:00:00-04:00"), RDF.date_time("2002-04-03T02:00:00-01:00")}, {RDF.date_time("1999-12-31T24:00:00"), RDF.date_time("2000-01-01T00:00:00")}, -# TODO: Assume that the dynamic context provides an implicit timezone value of -05:00 -# {RDF.date_time("2002-04-02T12:00:00"), RDF.date_time("2002-04-02T23:00:00+06:00")}, + + {RDF.date_time("2002-04-02T23:00:00Z"), RDF.date_time("2002-04-02T23:00:00+00:00")}, + {RDF.date_time("2002-04-02T23:00:00Z"), RDF.date_time("2002-04-02T23:00:00-00:00")}, + {RDF.date_time("2002-04-02T23:00:00+00:00"), RDF.date_time("2002-04-02T23:00:00-00:00")}, + # invalid literals {RDF.date_time("foo"), RDF.date_time("foo")}, ] @@ -244,12 +247,19 @@ defmodule RDF.EqualityTest do @value_equal_datetimes_by_coercion [ {RDF.date_time("2002-04-02T12:00:00-01:00"), elem(DateTime.from_iso8601("2002-04-02T12:00:00-01:00"), 1)}, {RDF.date_time("2002-04-02T12:00:00"), ~N"2002-04-02T12:00:00"}, + {RDF.date_time("2002-04-02T23:00:00Z"), elem(DateTime.from_iso8601("2002-04-02T23:00:00+00:00"), 1)}, + {RDF.date_time("2002-04-02T23:00:00+00:00"), elem(DateTime.from_iso8601("2002-04-02T23:00:00Z"), 1)}, + {RDF.date_time("2002-04-02T23:00:00-00:00"), elem(DateTime.from_iso8601("2002-04-02T23:00:00Z"), 1)}, + {RDF.date_time("2002-04-02T23:00:00-00:00"), elem(DateTime.from_iso8601("2002-04-02T23:00:00+00:00"), 1)}, ] @value_unequal_datetimes_by_coercion [ {RDF.date_time("2002-04-02T12:00:00-01:00"), elem(DateTime.from_iso8601("2002-04-02T12:00:00+00:00"), 1)}, ] @incomparable_datetimes [ - {RDF.string("2002-04-02T12:00:00-01:00"), RDF.date_time("2002-04-02T12:00:00-01:00")}, + {RDF.date_time("2002-04-02T12:00:00"), RDF.date_time("2002-04-02T12:00:00Z")}, + {RDF.string("2002-04-02T12:00:00-01:00"), RDF.date_time("2002-04-02T12:00:00-01:00")}, + # These are incomparable because of indeterminacy due to missing timezone + {RDF.date_time("2002-04-02T12:00:00"), RDF.date_time("2002-04-02T23:00:00+00:00")}, ] test "term equality", do: assert_term_equal @term_equal_datetimes @@ -277,24 +287,36 @@ defmodule RDF.EqualityTest do {RDF.date("2002-04-02-00:00"), RDF.date("2002-04-02+00:00")}, {RDF.date("2002-04-02Z"), RDF.date("2002-04-02+00:00")}, {RDF.date("2002-04-02Z"), RDF.date("2002-04-02-00:00")}, - {RDF.date("2002-04-02Z"), RDF.date("2002-04-02")}, - {RDF.date("2002-04-02+00:00"), RDF.date("2002-04-02")}, - {RDF.date("2002-04-02-00:00"), RDF.date("2002-04-02")}, ] @value_unequal_dates [ + {RDF.date("2002-04-03Z"), RDF.date("2002-04-02")}, + {RDF.date("2002-04-03"), RDF.date("2002-04-02Z")}, + {RDF.date("2002-04-03+00:00"), RDF.date("2002-04-02")}, + {RDF.date("2002-04-03-00:00"), RDF.date("2002-04-02")}, + # invalid literals + {RDF.date("2002.04.02"), RDF.date("2002-04-02")}, ] @value_equal_dates_by_coercion [ - {RDF.date("2002-04-02"), Date.from_iso8601!("2002-04-02")}, - {RDF.date("2002-04-02Z"), Date.from_iso8601!("2002-04-02")}, - {RDF.date("2002-04-02+00:00"), Date.from_iso8601!("2002-04-02")}, - {RDF.date("2002-04-02-00:00"), Date.from_iso8601!("2002-04-02")}, + {RDF.date("2002-04-02"), Date.from_iso8601!("2002-04-02")}, ] @value_unequal_dates_by_coercion [ - {RDF.date("2002-04-02"), Date.from_iso8601!("2002-04-03")}, - {RDF.date("2002-04-02+01:00"), Date.from_iso8601!("2002-04-02")}, + {RDF.date("2002-04-02"), Date.from_iso8601!("2002-04-03")}, + {RDF.date("2002-04-03+01:00"), Date.from_iso8601!("2002-04-02")}, + {RDF.date("2002-04-03Z"), Date.from_iso8601!("2002-04-02")}, + {RDF.date("2002-04-03+00:00"), Date.from_iso8601!("2002-04-02")}, + {RDF.date("2002-04-03-00:00"), Date.from_iso8601!("2002-04-02")}, ] @incomparable_dates [ {RDF.date("2002-04-02"), RDF.string("2002-04-02")}, + # These are incomparable because of indeterminacy due to missing timezone + {RDF.date("2002-04-02Z"), RDF.date("2002-04-02")}, + {RDF.date("2002-04-02"), RDF.date("2002-04-02Z")}, + {RDF.date("2002-04-02+00:00"), RDF.date("2002-04-02")}, + {RDF.date("2002-04-02-00:00"), RDF.date("2002-04-02")}, + {RDF.date("2002-04-02+01:00"), Date.from_iso8601!("2002-04-02")}, + {RDF.date("2002-04-02Z"), Date.from_iso8601!("2002-04-02")}, + {RDF.date("2002-04-02+00:00"), Date.from_iso8601!("2002-04-02")}, + {RDF.date("2002-04-02-00:00"), Date.from_iso8601!("2002-04-02")}, ] test "term equality", do: assert_term_equal @term_equal_dates diff --git a/test/unit/literal_comparison_test.exs b/test/unit/literal_comparison_test.exs index 92f7f1a..a328c0c 100644 --- a/test/unit/literal_comparison_test.exs +++ b/test/unit/literal_comparison_test.exs @@ -85,6 +85,11 @@ defmodule RDF.LiteralComparisonTest do test "when unequal" do assert_order {RDF.date_time("2002-04-02T12:00:00"), RDF.date_time("2002-04-02T17:00:00")} assert_order {RDF.date_time("2002-04-02T12:00:00+01:00"), RDF.date_time("2002-04-02T12:00:00+00:00")} + assert_order {RDF.date_time("2000-01-15T12:00:00"), RDF.date_time("2000-01-16T12:00:00Z")} + end + + test "when unequal due to missing time zone" do + assert_order {RDF.date_time("2000-01-15T00:00:00"), RDF.date_time("2000-02-15T00:00:00")} end test "when equal" do @@ -96,24 +101,34 @@ defmodule RDF.LiteralComparisonTest do # TODO: Assume that the dynamic context provides an implicit timezone value of -05:00 # assert_equal {RDF.date_time("2002-04-02T12:00:00"), RDF.date_time("2002-04-02T23:00:00+06:00")} end + + test "when indeterminate" do + assert_indeterminate {RDF.date_time("2000-01-01T12:00:00"), RDF.date_time("1999-12-31T23:00:00Z")} + assert_indeterminate {RDF.date_time("2000-01-16T12:00:00"), RDF.date_time("2000-01-16T12:00:00Z")} + assert_indeterminate {RDF.date_time("2000-01-16T00:00:00"), RDF.date_time("2000-01-16T12:00:00Z")} + end end describe "RDF.Date comparisons" do test "when unequal" do assert_order {RDF.date("2002-04-02"), RDF.date("2002-04-03")} assert_order {RDF.date("2002-04-02+01:00"), RDF.date("2002-04-03+00:00")} + assert_order {RDF.date("2002-04-02"), RDF.date("2002-04-03Z")} end test "when equal" do assert_equal {RDF.date("2002-04-02-01:00"), RDF.date("2002-04-02-01:00")} assert_equal {RDF.date("2002-04-02"), RDF.date("2002-04-02")} # TODO: -# assert_equal {RDF.date("2002-04-02-00:00"), RDF.date("2002-04-02+00:00")} -# assert_equal {RDF.date("2002-04-02Z"), RDF.date("2002-04-02+00:00")} -# assert_equal {RDF.date("2002-04-02Z"), RDF.date("2002-04-02-00:00")} -# assert_equal {RDF.date("2002-04-02Z"), RDF.date("2002-04-02")} -# assert_equal {RDF.date("2002-04-02+00:00"), RDF.date("2002-04-02")} -# assert_equal {RDF.date("2002-04-02-00:00"), RDF.date("2002-04-02")} + assert_equal {RDF.date("2002-04-02-00:00"), RDF.date("2002-04-02+00:00")} + assert_equal {RDF.date("2002-04-02Z"), RDF.date("2002-04-02+00:00")} + assert_equal {RDF.date("2002-04-02Z"), RDF.date("2002-04-02-00:00")} + end + + test "when indeterminate" do + assert_indeterminate {RDF.date("2002-04-02Z"), RDF.date("2002-04-02")} + assert_indeterminate {RDF.date("2002-04-02+00:00"), RDF.date("2002-04-02")} + assert_indeterminate {RDF.date("2002-04-02-00:00"), RDF.date("2002-04-02")} end end @@ -127,6 +142,12 @@ defmodule RDF.LiteralComparisonTest do assert_equal {RDF.time("12:00:00+01:00"), RDF.time("12:00:00+01:00")} assert_equal {RDF.time("12:00:00"), RDF.time("12:00:00")} end + + test "when indeterminate" do + assert_indeterminate {RDF.date("2002-04-02Z"), RDF.date("2002-04-02")} + assert_indeterminate {RDF.date("2002-04-02+00:00"), RDF.date("2002-04-02")} + assert_indeterminate {RDF.date("2002-04-02-00:00"), RDF.date("2002-04-02")} + end end describe "comparisons on RDF.Literals with unsupported types" do @@ -141,29 +162,40 @@ defmodule RDF.LiteralComparisonTest do end end - test "incomparable" do - Enum.each [ - {RDF.string("http://example.com/"), RDF.iri("http://example.com/")}, - {RDF.string("foo"), RDF.bnode("foo")}, - {RDF.string("true"), RDF.true}, - {RDF.string("42"), RDF.integer(42)}, - {RDF.string("3.14"), RDF.decimal(3.14)}, - {RDF.string("2002-04-02T12:00:00"), RDF.date_time("2002-04-02T12:00:00")}, - {RDF.string("2002-04-02"), RDF.date("2002-04-02")}, - {RDF.string("12:00:00"), RDF.time("12:00:00")}, - {RDF.false, nil}, - {RDF.true, RDF.integer(42)}, - {RDF.true, RDF.decimal(3.14)}, - {RDF.date_time("2002-04-02T12:00:00"), RDF.true}, - {RDF.date_time("2002-04-02T12:00:00"), RDF.integer(42)}, - {RDF.date_time("2002-04-02T12:00:00"), RDF.decimal(3.14)}, - {RDF.date("2002-04-02"), RDF.true}, - {RDF.date("2002-04-02"), RDF.integer(42)}, - {RDF.date("2002-04-02"), RDF.decimal(3.14)}, - {RDF.time("12:00:00"), RDF.true}, - {RDF.time("12:00:00"), RDF.integer(42)}, - {RDF.time("12:00:00"), RDF.decimal(3.14)}, - ], &assert_incomparable/1 + describe "incomparable " do + test "when comparing incomparable types" do + Enum.each [ + {RDF.string("http://example.com/"), RDF.iri("http://example.com/")}, + {RDF.string("foo"), RDF.bnode("foo")}, + {RDF.string("true"), RDF.true}, + {RDF.string("42"), RDF.integer(42)}, + {RDF.string("3.14"), RDF.decimal(3.14)}, + {RDF.string("2002-04-02T12:00:00"), RDF.date_time("2002-04-02T12:00:00")}, + {RDF.string("2002-04-02"), RDF.date("2002-04-02")}, + {RDF.string("12:00:00"), RDF.time("12:00:00")}, + {RDF.false, nil}, + {RDF.true, RDF.integer(42)}, + {RDF.true, RDF.decimal(3.14)}, + {RDF.date_time("2002-04-02T12:00:00"), RDF.true}, + {RDF.date_time("2002-04-02T12:00:00"), RDF.integer(42)}, + {RDF.date_time("2002-04-02T12:00:00"), RDF.decimal(3.14)}, + {RDF.date("2002-04-02"), RDF.true}, + {RDF.date("2002-04-02"), RDF.integer(42)}, + {RDF.date("2002-04-02"), RDF.decimal(3.14)}, + {RDF.time("12:00:00"), RDF.true}, + {RDF.time("12:00:00"), RDF.integer(42)}, + {RDF.time("12:00:00"), RDF.decimal(3.14)}, + ], &assert_incomparable/1 + end + + test "when comparing invalid literals" do + Enum.each [ + {RDF.true, RDF.boolean(42)}, + {RDF.date_time("2002-04-02T12:00:00"), RDF.date_time("2002.04.02 12:00")}, + {RDF.date("2002-04-02"), RDF.date("2002.04.02")}, + {RDF.time("12:00:00"), RDF.time("12-00-00")}, + ], &assert_incomparable/1 + end end @@ -200,6 +232,17 @@ defmodule RDF.LiteralComparisonTest do assert_less_than({right, left}, nil) end + defp assert_indeterminate({left, right}) do + assert_compare_result({left, right}, :indeterminate) + assert_compare_result({right, left}, :indeterminate) + + assert_greater_than({left, right}, false) + assert_greater_than({right, left}, false) + + assert_less_than({left, right}, false) + assert_less_than({right, left}, false) + end + defp assert_compare_result({left, right}, expected) do result = RDF.Literal.compare(left, right) assert result == expected, """