defmodule RDF.XSD.Datatype do @moduledoc """ A behaviour for XSD datatypes. A XSD datatype has three properties: - A _value space_, which is a set of values. - A _lexical space_, which is a set of _literals_ used to denote the values. - A collection of functions associated with the datatype. ### Builtin XSD datatypes RDF.ex comes with the following builtin implementations of XSD datatypes: | `xsd:boolean` | `RDF.XSD.Boolean` | | `xsd:float` | `RDF.XSD.Float` | | `xsd:double` | `RDF.XSD.Double` | | `xsd:decimal` | `RDF.XSD.Decimal` | | `xsd:integer` | `RDF.XSD.Integer` | | `xsd:long` | `RDF.XSD.Long` | | `xsd:int` | `RDF.XSD.Int` | | `xsd:short` | `RDF.XSD.Short` | | `xsd:byte` | `RDF.XSD.Byte` | | `xsd:nonPositiveInteger` | `RDF.XSD.NonPositiveInteger` | | `xsd:negativeInteger` | `RDF.XSD.NegativeInteger` | | `xsd:nonNegativeInteger` | `RDF.XSD.NonNegativeInteger` | | `xsd:positiveInteger` | `RDF.XSD.PositiveInteger` | | `xsd:unsignedLong` | `RDF.XSD.UnsignedLong` | | `xsd:unsignedInt` | `RDF.XSD.UnsignedInt` | | `xsd:unsignedShort` | `RDF.XSD.UnsignedShort` | | `xsd:unsignedByte` | `RDF.XSD.UnsignedByte` | | `xsd:string` | `RDF.XSD.String` | | `xsd:normalizedString` | ❌ | | `xsd:token` | ❌ | | `xsd:language` | ❌ | | `xsd:Name` | ❌ | | `xsd:NCName` | ❌ | | `xsd:ID` | ❌ | | `xsd:IDREF` | ❌ | | `xsd:ENTITY` | ❌ | | `xsd:NMTOKEN` | ❌ | | `xsd:dateTime` | `RDF.XSD.DateTime` | | `xsd:dateTimeStamp` | ❌ | | `xsd:date` | `RDF.XSD.Date` | | `xsd:time` | `RDF.XSD.Time` | | `xsd:duration` | ❌ | | `xsd:dayTimeDuration` | ❌ | | `xsd:yearMonthDuration` | ❌ | | `xsd:gYearMonth` | ❌ | | `xsd:gYear` | ❌ | | `xsd:gMonthDay` | ❌ | | `xsd:gDay` | ❌ | | `xsd:gMonth` | ❌ | | `xsd:base64Binary` | ❌ | | `xsd:hexBinary` | ❌ | | `xsd:anyURI` | `RDF.XSD.AnyURI` | | `xsd:QName` | ❌ | | `xsd:NOTATION` | ❌ | There are some notable difference in the implementations of some datatypes compared to the original spec: - `RDF.XSD.Integer` is not derived from `RDF.XSD.Decimal`, but implemented as a primitive datatype - `RDF.XSD.Float` is not implemented as a primitive datatype, but derived from `RDF.XSD.Double` without further restrictions instead, since Erlang doesn't have a corresponding datatype see """ @type t :: module @type uncanonical_lexical :: String.t() | nil @type literal :: %{ :__struct__ => t(), :value => any(), :uncanonical_lexical => uncanonical_lexical() } import RDF.Utils.Guards @doc """ Returns if the `RDF.XSD.Datatype` is a primitive datatype. """ @callback primitive?() :: boolean @doc """ The base datatype from which a `RDF.XSD.Datatype` is derived. Note: Since this library focuses on atomic types and the special `xsd:anyAtomicType` specified as the base type of all primitive types in the W3C spec wouldn't serve any purpose here, all primitive datatypes just return `nil` instead. """ @callback base :: t() | nil @doc """ The primitive `RDF.XSD.Datatype` from which a `RDF.XSD.Datatype` is derived. In case of a primitive `RDF.XSD.Datatype` this function returns this `RDF.XSD.Datatype` itself. """ @callback base_primitive :: t() @doc """ Checks if the `RDF.XSD.Datatype` is directly or indirectly derived from the given `RDF.XSD.Datatype`. Note that this is just a basic datatype reflection function on the module level and does not work with `RDF.Literal`s. See `c:RDF.Literal.Datatype.datatype?/1` instead. """ @callback derived_from?(t()) :: boolean @doc """ The set of applicable facets of a `RDF.XSD.Datatype`. """ @callback applicable_facets :: [RDF.XSD.Facet.t()] @doc """ A mapping from the lexical space of a `RDF.XSD.Datatype` into its value space. """ @callback lexical_mapping(String.t(), Keyword.t()) :: any @doc """ A mapping from Elixir values into the value space of a `RDF.XSD.Datatype`. If the Elixir mapping for the given value can not be mapped into value space of the XSD datatype an implementation should return `@invalid_value` (which is just `nil` at the moment, so `nil` is never a valid value of a value space). Otherwise a tuple `{value, lexical}` with `value` being the internal representation of the mapped value from the value space and `lexical` being the lexical representation to be used for the Elixir value or `nil` if `c:init_valid_lexical/3` should be used to determine the lexical form in general (i.e. also when initialized with a string via the `c:lexical_mapping/2`). Since the later case is most often what you want, you can also return `value` directly, as long as it is not a two element tuple. """ @callback elixir_mapping(any, Keyword.t()) :: any | {any, uncanonical_lexical} @doc """ Returns the standard lexical representation for a value of the value space of a `RDF.XSD.Datatype`. """ @callback canonical_mapping(any) :: String.t() @doc """ Produces the lexical representation to be used for a `RDF.XSD.Datatype` literal. By default the lexical representation of a `RDF.XSD.Datatype` is either the canonical form in case it is created from a non-string Elixir value or, if it is created from a string, just with that string as the lexical form. But there can be various reasons for why this should be different for certain datatypes. For example, for `RDF.XSD.Double`s given as Elixir floats, we want the default lexical representation to be the decimal and not the canonical exponential form. Another reason might be that additional options are given which should be taken into account in the lexical form. If the lexical representation for a given `value` and `lexical` should be the canonical one, an implementation should return `nil`. """ @callback init_valid_lexical(any, uncanonical_lexical, Keyword.t()) :: uncanonical_lexical @doc """ Produces the lexical representation of an invalid value. The default implementation of the `_using__` macro just returns the `to_string/1` representation of the value. """ @callback init_invalid_lexical(any, Keyword.t()) :: String.t() @doc """ Returns the `RDF.XSD.Datatype` for a datatype IRI. """ defdelegate get(id), to: RDF.Literal.Datatype.Registry, as: :xsd_datatype @doc false def most_specific(left, right) def most_specific(datatype, datatype), do: datatype def most_specific(left, right) do cond do left.datatype?(right) -> right right.datatype?(left) -> left true -> nil end end defmacro __using__(opts) do quote do defstruct [:value, :uncanonical_lexical] @behaviour unquote(__MODULE__) use RDF.Literal.Datatype, unquote(opts) @invalid_value nil @type invalid_value :: nil @type value :: valid_value | invalid_value @type t :: %__MODULE__{ value: value, uncanonical_lexical: RDF.XSD.Datatype.uncanonical_lexical() } @doc !""" This function is just used to check if a module is a RDF.XSD.Datatype. See `RDF.Literal.Datatype.Registry.is_xsd_datatype?/1`. """ def __xsd_datatype_indicator__, do: true @doc """ Checks if the given literal has datatype this or a datatype that is derived of it. """ @impl RDF.Literal.Datatype def datatype?(%RDF.Literal{literal: literal}), do: datatype?(literal) def datatype?(%datatype{}), do: datatype?(datatype) def datatype?(__MODULE__), do: true def datatype?(datatype) when maybe_module(datatype) do RDF.XSD.datatype?(datatype) and datatype.derived_from?(__MODULE__) end def datatype?(_), do: false @doc false def datatype!(%__MODULE__{}), do: true def datatype!(%datatype{} = literal) do datatype?(datatype) || raise RDF.XSD.Datatype.Mismatch, value: literal, expected_type: __MODULE__ end def datatype!(value), do: raise(RDF.XSD.Datatype.Mismatch, value: value, expected_type: __MODULE__) @doc """ Creates a new `RDF.Literal` with this datatype and the given `value`. """ # Dialyzer causes a warning on all primitives since the facet_conform?/2 call # always returns true there, so the other branch is unnecessary. This could # be fixed by generating a special version for primitives, but it's not worth # maintaining different versions of this function which must be kept in-sync. @dialyzer {:nowarn_function, new: 2} @impl RDF.Literal.Datatype def new(value, opts \\ []) def new(lexical, opts) when is_binary(lexical) do case lexical_mapping(lexical, opts) do @invalid_value -> build_invalid(lexical, opts) value -> if facet_conform?(value, lexical) do build_valid(value, lexical, opts) else build_invalid(lexical, opts) end end end def new(value, opts) do case elixir_mapping(value, opts) do @invalid_value -> build_invalid(value, opts) value -> {value, lexical} = case value do {value, lexical} -> {value, lexical} value -> {value, nil} end if facet_conform?(value, lexical) do build_valid(value, lexical, opts) else build_invalid(value, opts) end end end @doc """ Creates a new `RDF.Literal` with this datatype and the given `value` or fails when it is not valid. """ @impl RDF.Literal.Datatype def new!(value, opts \\ []) do literal = new(value, opts) if valid?(literal) do literal else raise ArgumentError, "#{inspect(value)} is not a valid #{inspect(__MODULE__)}" end end @doc false @spec build_valid(any, RDF.XSD.Datatype.uncanonical_lexical(), Keyword.t()) :: RDF.Literal.t() def build_valid(value, lexical, opts) do if Keyword.get(opts, :canonicalize) do literal(%__MODULE__{value: value}) else initial_lexical = init_valid_lexical(value, lexical, opts) literal(%__MODULE__{ value: value, uncanonical_lexical: if(initial_lexical && initial_lexical != canonical_mapping(value), do: initial_lexical ) }) end end @dialyzer {:nowarn_function, build_invalid: 2} defp build_invalid(lexical, opts) do literal(%__MODULE__{uncanonical_lexical: init_invalid_lexical(lexical, opts)}) end @doc """ Returns the value of a `RDF.Literal` of this or a derived datatype. """ @impl RDF.Literal.Datatype def value(%RDF.Literal{literal: literal}), do: value(literal) def value(%__MODULE__{} = literal), do: literal.value def value(literal) do datatype!(literal) literal.value end @doc """ Returns the lexical form of a `RDF.Literal` of this datatype. """ @impl RDF.Literal.Datatype def lexical(lexical) def lexical(%RDF.Literal{literal: literal}), do: lexical(literal) def lexical(%__MODULE__{value: value, uncanonical_lexical: nil}), do: canonical_mapping(value) def lexical(%__MODULE__{uncanonical_lexical: lexical}), do: lexical @doc """ Returns the canonical lexical form of a `RDF.Literal` of this datatype. """ @impl RDF.Literal.Datatype def canonical_lexical(%RDF.Literal{literal: literal}), do: canonical_lexical(literal) def canonical_lexical(%__MODULE__{value: value}) when not is_nil(value), do: canonical_mapping(value) def canonical_lexical(_), do: nil @doc """ Produces the canonical representation of a `RDF.Literal` of this datatype. """ @impl RDF.Literal.Datatype def canonical(literal) def canonical(%RDF.Literal{literal: %__MODULE__{uncanonical_lexical: nil}} = literal), do: literal def canonical(%RDF.Literal{literal: %__MODULE__{value: @invalid_value}} = literal), do: literal def canonical(%RDF.Literal{literal: %__MODULE__{} = literal}), do: canonical(literal) def canonical(%__MODULE__{} = literal), do: literal(%__MODULE__{literal | uncanonical_lexical: nil}) @doc """ Determines if the lexical form of a `RDF.Literal` of this datatype is the canonical form. """ @impl RDF.Literal.Datatype def canonical?(literal) def canonical?(%RDF.Literal{literal: literal}), do: canonical?(literal) def canonical?(%__MODULE__{uncanonical_lexical: nil}), do: true def canonical?(%__MODULE__{}), do: false @doc """ Determines if a `RDF.Literal` of this or a derived datatype has a proper value of its value space. """ @impl RDF.Literal.Datatype def valid?(literal) def valid?(%RDF.Literal{literal: literal}), do: valid?(literal) def valid?(%__MODULE__{value: @invalid_value}), do: false def valid?(%__MODULE__{}), do: true def valid?(%datatype{} = literal), do: datatype?(datatype) and datatype.valid?(literal) def valid?(_), do: false @doc false defp equality_path(left_datatype, right_datatype) defp equality_path(datatype, datatype), do: {:same_or_derived, datatype} defp equality_path(left_datatype, right_datatype) do if RDF.XSD.datatype?(left_datatype) and RDF.XSD.datatype?(right_datatype) do if datatype = RDF.XSD.Datatype.most_specific(left_datatype, right_datatype) do {:same_or_derived, datatype} else {:different, left_datatype} end else {:different, left_datatype} end end @doc """ Compares two `RDF.Literal`s. If the first literal is greater than the second `:gt` is returned, if less than `:lt` is returned. If both literal are equal `:eq` is returned. If the literals can not be compared either `nil` is returned, when they generally can be compared due to their datatype, or `:indeterminate` is returned, when the order of the given values is not defined on only partially ordered datatypes. """ @spec compare(RDF.Literal.t() | any, RDF.Literal.t() | any) :: RDF.Literal.Datatype.comparison_result() | :indeterminate | nil def compare(left, right) def compare(left, %RDF.Literal{literal: right}), do: compare(left, right) def compare(%RDF.Literal{literal: left}, right), do: compare(left, right) def compare(left, right) do if RDF.XSD.datatype?(left) and RDF.XSD.datatype?(right) and RDF.Literal.Datatype.valid?(left) and RDF.Literal.Datatype.valid?(right) do do_compare(left, right) end end defimpl Inspect do "Elixir.Inspect." <> datatype_name = to_string(__MODULE__) @datatype_name datatype_name def inspect(literal, _opts) do "%#{@datatype_name}{value: #{inspect(literal.value)}, lexical: #{ literal |> literal.__struct__.lexical() |> inspect() }}" end end end end end