rdf-ex/lib/rdf/xsd/datatype.ex
2020-06-29 10:37:42 +02:00

447 lines
15 KiB
Elixir

defmodule RDF.XSD.Datatype do
@moduledoc """
A behaviour for XSD datatypes.
A XSD datatype has three properties:
- A _value space_, which is a set of values.
- A _lexical space_, which is a set of _literals_ used to denote the values.
- A collection of functions associated with the datatype.
### Builtin XSD datatypes
RDF.ex comes with the following builtin implementations of XSD datatypes:
| `xsd:boolean` | `RDF.XSD.Boolean` |
| `xsd:float` | `RDF.XSD.Float` |
| `xsd:double` | `RDF.XSD.Double` |
| `xsd:decimal` | `RDF.XSD.Decimal` |
| `xsd:integer` | `RDF.XSD.Integer` |
| `xsd:long` | `RDF.XSD.Long` |
| `xsd:int` | `RDF.XSD.Int` |
| `xsd:short` | `RDF.XSD.Short` |
| `xsd:byte` | `RDF.XSD.Byte` |
| `xsd:nonPositiveInteger` | `RDF.XSD.NonPositiveInteger` |
| `xsd:negativeInteger` | `RDF.XSD.NegativeInteger` |
| `xsd:nonNegativeInteger` | `RDF.XSD.NonNegativeInteger` |
| `xsd:positiveInteger` | `RDF.XSD.PositiveInteger` |
| `xsd:unsignedLong` | `RDF.XSD.UnsignedLong` |
| `xsd:unsignedInt` | `RDF.XSD.UnsignedInt` |
| `xsd:unsignedShort` | `RDF.XSD.UnsignedShort` |
| `xsd:unsignedByte` | `RDF.XSD.UnsignedByte` |
| `xsd:string` | `RDF.XSD.String` |
| `xsd:normalizedString` | ❌ |
| `xsd:token` | ❌ |
| `xsd:language` | ❌ |
| `xsd:Name` | ❌ |
| `xsd:NCName` | ❌ |
| `xsd:ID` | ❌ |
| `xsd:IDREF` | ❌ |
| `xsd:ENTITY` | ❌ |
| `xsd:NMTOKEN` | ❌ |
| `xsd:dateTime` | `RDF.XSD.DateTime` |
| `xsd:dateTimeStamp` | ❌ |
| `xsd:date` | `RDF.XSD.Date` |
| `xsd:time` | `RDF.XSD.Time` |
| `xsd:duration` | ❌ |
| `xsd:dayTimeDuration` | ❌ |
| `xsd:yearMonthDuration` | ❌ |
| `xsd:gYearMonth` | ❌ |
| `xsd:gYear` | ❌ |
| `xsd:gMonthDay` | ❌ |
| `xsd:gDay` | ❌ |
| `xsd:gMonth` | ❌ |
| `xsd:base64Binary` | ❌ |
| `xsd:hexBinary` | ❌ |
| `xsd:anyURI` | `RDF.XSD.AnyURI` |
| `xsd:QName` | ❌ |
| `xsd:NOTATION` | ❌ |
There are some notable difference in the implementations of some datatypes compared to
the original spec:
- `RDF.XSD.Integer` is not derived from `RDF.XSD.Decimal`, but implemented as a primitive datatype
- `RDF.XSD.Float` is not implemented as a primitive datatype, but derived from `RDF.XSD.Double`
without further restrictions instead, since Erlang doesn't have a corresponding datatype
see <https://www.w3.org/TR/xmlschema11-2/#built-in-datatypes>
"""
@type t :: module
@type uncanonical_lexical :: String.t() | nil
@type literal :: %{
:__struct__ => t(),
:value => any(),
:uncanonical_lexical => uncanonical_lexical()
}
import RDF.Utils.Guards
@doc """
Returns if the `RDF.XSD.Datatype` is a primitive datatype.
"""
@callback primitive?() :: boolean
@doc """
The base datatype from which a `RDF.XSD.Datatype` is derived.
Note: Since this library focuses on atomic types and the special `xsd:anyAtomicType`
specified as the base type of all primitive types in the W3C spec wouldn't serve any
purpose here, all primitive datatypes just return `nil` instead.
"""
@callback base :: t() | nil
@doc """
The primitive `RDF.XSD.Datatype` from which a `RDF.XSD.Datatype` is derived.
In case of a primitive `RDF.XSD.Datatype` this function returns this `RDF.XSD.Datatype` itself.
"""
@callback base_primitive :: t()
@doc """
Checks if the `RDF.XSD.Datatype` is directly or indirectly derived from the given `RDF.XSD.Datatype`.
Note that this is just a basic datatype reflection function on the module level
and does not work with `RDF.Literal`s. See `c:RDF.Literal.Datatype.datatype?/1` instead.
"""
@callback derived_from?(t()) :: boolean
@doc """
The set of applicable facets of a `RDF.XSD.Datatype`.
"""
@callback applicable_facets :: [RDF.XSD.Facet.t()]
@doc """
A mapping from the lexical space of a `RDF.XSD.Datatype` into its value space.
"""
@callback lexical_mapping(String.t(), Keyword.t()) :: any
@doc """
A mapping from Elixir values into the value space of a `RDF.XSD.Datatype`.
If the Elixir mapping for the given value can not be mapped into value space of
the XSD datatype an implementation should return `@invalid_value`
(which is just `nil` at the moment, so `nil` is never a valid value of a value space).
Otherwise a tuple `{value, lexical}` with `value` being the internal representation
of the mapped value from the value space and `lexical` being the lexical representation
to be used for the Elixir value or `nil` if `c:init_valid_lexical/3` should be used
to determine the lexical form in general (i.e. also when initialized with a string
via the `c:lexical_mapping/2`). Since the later case is most often what you want,
you can also return `value` directly, as long as it is not a two element tuple.
"""
@callback elixir_mapping(any, Keyword.t()) :: any | {any, uncanonical_lexical}
@doc """
Returns the standard lexical representation for a value of the value space of a `RDF.XSD.Datatype`.
"""
@callback canonical_mapping(any) :: String.t()
@doc """
Produces the lexical representation to be used for a `RDF.XSD.Datatype` literal.
By default the lexical representation of a `RDF.XSD.Datatype` is either the
canonical form in case it is created from a non-string Elixir value or, if it
is created from a string, just with that string as the lexical form.
But there can be various reasons for why this should be different for certain
datatypes. For example, for `RDF.XSD.Double`s given as Elixir floats, we want the
default lexical representation to be the decimal and not the canonical
exponential form. Another reason might be that additional options are given
which should be taken into account in the lexical form.
If the lexical representation for a given `value` and `lexical` should be the
canonical one, an implementation should return `nil`.
"""
@callback init_valid_lexical(any, uncanonical_lexical, Keyword.t()) :: uncanonical_lexical
@doc """
Produces the lexical representation of an invalid value.
The default implementation of the `_using__` macro just returns the `to_string/1`
representation of the value.
"""
@callback init_invalid_lexical(any, Keyword.t()) :: String.t()
@doc """
Returns the `RDF.XSD.Datatype` for a datatype IRI.
"""
defdelegate get(id), to: RDF.Literal.Datatype.Registry, as: :xsd_datatype
@doc false
def most_specific(left, right)
def most_specific(datatype, datatype), do: datatype
def most_specific(left, right) do
cond do
left.datatype?(right) -> right
right.datatype?(left) -> left
true -> nil
end
end
defmacro __using__(opts) do
quote do
defstruct [:value, :uncanonical_lexical]
@behaviour unquote(__MODULE__)
use RDF.Literal.Datatype, unquote(opts)
@invalid_value nil
@type invalid_value :: nil
@type value :: valid_value | invalid_value
@type t :: %__MODULE__{
value: value,
uncanonical_lexical: RDF.XSD.Datatype.uncanonical_lexical()
}
@doc !"""
This function is just used to check if a module is a RDF.XSD.Datatype.
See `RDF.Literal.Datatype.Registry.is_xsd_datatype?/1`.
"""
def __xsd_datatype_indicator__, do: true
@doc """
Checks if the given literal has datatype this or a datatype that is derived of it.
"""
@impl RDF.Literal.Datatype
def datatype?(%RDF.Literal{literal: literal}), do: datatype?(literal)
def datatype?(%datatype{}), do: datatype?(datatype)
def datatype?(__MODULE__), do: true
def datatype?(datatype) when maybe_module(datatype) do
RDF.XSD.datatype?(datatype) and datatype.derived_from?(__MODULE__)
end
def datatype?(_), do: false
@doc false
def datatype!(%__MODULE__{}), do: true
def datatype!(%datatype{} = literal) do
datatype?(datatype) ||
raise RDF.XSD.Datatype.Mismatch, value: literal, expected_type: __MODULE__
end
def datatype!(value),
do: raise(RDF.XSD.Datatype.Mismatch, value: value, expected_type: __MODULE__)
@doc """
Creates a new `RDF.Literal` with this datatype and the given `value`.
"""
# Dialyzer causes a warning on all primitives since the facet_conform?/2 call
# always returns true there, so the other branch is unnecessary. This could
# be fixed by generating a special version for primitives, but it's not worth
# maintaining different versions of this function which must be kept in-sync.
@dialyzer {:nowarn_function, new: 2}
@impl RDF.Literal.Datatype
def new(value, opts \\ [])
def new(lexical, opts) when is_binary(lexical) do
case lexical_mapping(lexical, opts) do
@invalid_value ->
build_invalid(lexical, opts)
value ->
if facet_conform?(value, lexical) do
build_valid(value, lexical, opts)
else
build_invalid(lexical, opts)
end
end
end
def new(value, opts) do
case elixir_mapping(value, opts) do
@invalid_value ->
build_invalid(value, opts)
value ->
{value, lexical} =
case value do
{value, lexical} -> {value, lexical}
value -> {value, nil}
end
if facet_conform?(value, lexical) do
build_valid(value, lexical, opts)
else
build_invalid(value, opts)
end
end
end
@doc """
Creates a new `RDF.Literal` with this datatype and the given `value` or fails when it is not valid.
"""
@impl RDF.Literal.Datatype
def new!(value, opts \\ []) do
literal = new(value, opts)
if valid?(literal) do
literal
else
raise ArgumentError, "#{inspect(value)} is not a valid #{inspect(__MODULE__)}"
end
end
@doc false
@spec build_valid(any, RDF.XSD.Datatype.uncanonical_lexical(), Keyword.t()) ::
RDF.Literal.t()
def build_valid(value, lexical, opts) do
if Keyword.get(opts, :canonicalize) do
literal(%__MODULE__{value: value})
else
initial_lexical = init_valid_lexical(value, lexical, opts)
literal(%__MODULE__{
value: value,
uncanonical_lexical:
if(initial_lexical && initial_lexical != canonical_mapping(value),
do: initial_lexical
)
})
end
end
@dialyzer {:nowarn_function, build_invalid: 2}
defp build_invalid(lexical, opts) do
literal(%__MODULE__{uncanonical_lexical: init_invalid_lexical(lexical, opts)})
end
@doc """
Returns the value of a `RDF.Literal` of this or a derived datatype.
"""
@impl RDF.Literal.Datatype
def value(%RDF.Literal{literal: literal}), do: value(literal)
def value(%__MODULE__{} = literal), do: literal.value
def value(literal) do
datatype!(literal)
literal.value
end
@doc """
Returns the lexical form of a `RDF.Literal` of this datatype.
"""
@impl RDF.Literal.Datatype
def lexical(lexical)
def lexical(%RDF.Literal{literal: literal}), do: lexical(literal)
def lexical(%__MODULE__{value: value, uncanonical_lexical: nil}),
do: canonical_mapping(value)
def lexical(%__MODULE__{uncanonical_lexical: lexical}), do: lexical
@doc """
Returns the canonical lexical form of a `RDF.Literal` of this datatype.
"""
@impl RDF.Literal.Datatype
def canonical_lexical(%RDF.Literal{literal: literal}), do: canonical_lexical(literal)
def canonical_lexical(%__MODULE__{value: value}) when not is_nil(value),
do: canonical_mapping(value)
def canonical_lexical(_), do: nil
@doc """
Produces the canonical representation of a `RDF.Literal` of this datatype.
"""
@impl RDF.Literal.Datatype
def canonical(literal)
def canonical(%RDF.Literal{literal: %__MODULE__{uncanonical_lexical: nil}} = literal),
do: literal
def canonical(%RDF.Literal{literal: %__MODULE__{value: @invalid_value}} = literal),
do: literal
def canonical(%RDF.Literal{literal: %__MODULE__{} = literal}),
do: canonical(literal)
def canonical(%__MODULE__{} = literal),
do: literal(%__MODULE__{literal | uncanonical_lexical: nil})
@doc """
Determines if the lexical form of a `RDF.Literal` of this datatype is the canonical form.
"""
@impl RDF.Literal.Datatype
def canonical?(literal)
def canonical?(%RDF.Literal{literal: literal}), do: canonical?(literal)
def canonical?(%__MODULE__{uncanonical_lexical: nil}), do: true
def canonical?(%__MODULE__{}), do: false
@doc """
Determines if a `RDF.Literal` of this or a derived datatype has a proper value of its value space.
"""
@impl RDF.Literal.Datatype
def valid?(literal)
def valid?(%RDF.Literal{literal: literal}), do: valid?(literal)
def valid?(%__MODULE__{value: @invalid_value}), do: false
def valid?(%__MODULE__{}), do: true
def valid?(%datatype{} = literal),
do: datatype?(datatype) and datatype.valid?(literal)
def valid?(_), do: false
@doc false
defp equality_path(left_datatype, right_datatype)
defp equality_path(datatype, datatype), do: {:same_or_derived, datatype}
defp equality_path(left_datatype, right_datatype) do
if RDF.XSD.datatype?(left_datatype) and RDF.XSD.datatype?(right_datatype) do
if datatype = RDF.XSD.Datatype.most_specific(left_datatype, right_datatype) do
{:same_or_derived, datatype}
else
{:different, left_datatype}
end
else
{:different, left_datatype}
end
end
@doc """
Compares two `RDF.Literal`s.
If the first literal is greater than the second `:gt` is returned, if less than `:lt` is returned.
If both literal are equal `:eq` is returned.
If the literals can not be compared either `nil` is returned, when they generally can be compared
due to their datatype, or `:indeterminate` is returned, when the order of the given values is
not defined on only partially ordered datatypes.
"""
@spec compare(RDF.Literal.t() | any, RDF.Literal.t() | any) ::
RDF.Literal.Datatype.comparison_result() | :indeterminate | nil
def compare(left, right)
def compare(left, %RDF.Literal{literal: right}), do: compare(left, right)
def compare(%RDF.Literal{literal: left}, right), do: compare(left, right)
def compare(left, right) do
if RDF.XSD.datatype?(left) and RDF.XSD.datatype?(right) and
RDF.Literal.Datatype.valid?(left) and RDF.Literal.Datatype.valid?(right) do
do_compare(left, right)
end
end
defimpl Inspect do
"Elixir.Inspect." <> datatype_name = to_string(__MODULE__)
@datatype_name datatype_name
def inspect(literal, _opts) do
"%#{@datatype_name}{value: #{inspect(literal.value)}, lexical: #{
literal |> literal.__struct__.lexical() |> inspect()
}}"
end
end
end
end
end