447 lines
15 KiB
Elixir
447 lines
15 KiB
Elixir
defmodule RDF.XSD.Datatype do
|
|
@moduledoc """
|
|
A behaviour for XSD datatypes.
|
|
|
|
A XSD datatype has three properties:
|
|
|
|
- A _value space_, which is a set of values.
|
|
- A _lexical space_, which is a set of _literals_ used to denote the values.
|
|
- A collection of functions associated with the datatype.
|
|
|
|
|
|
### Builtin XSD datatypes
|
|
|
|
RDF.ex comes with the following builtin implementations of XSD datatypes:
|
|
|
|
| `xsd:boolean` | `RDF.XSD.Boolean` |
|
|
| `xsd:float` | `RDF.XSD.Float` |
|
|
| `xsd:double` | `RDF.XSD.Double` |
|
|
| `xsd:decimal` | `RDF.XSD.Decimal` |
|
|
| `xsd:integer` | `RDF.XSD.Integer` |
|
|
| `xsd:long` | `RDF.XSD.Long` |
|
|
| `xsd:int` | `RDF.XSD.Int` |
|
|
| `xsd:short` | `RDF.XSD.Short` |
|
|
| `xsd:byte` | `RDF.XSD.Byte` |
|
|
| `xsd:nonPositiveInteger` | `RDF.XSD.NonPositiveInteger` |
|
|
| `xsd:negativeInteger` | `RDF.XSD.NegativeInteger` |
|
|
| `xsd:nonNegativeInteger` | `RDF.XSD.NonNegativeInteger` |
|
|
| `xsd:positiveInteger` | `RDF.XSD.PositiveInteger` |
|
|
| `xsd:unsignedLong` | `RDF.XSD.UnsignedLong` |
|
|
| `xsd:unsignedInt` | `RDF.XSD.UnsignedInt` |
|
|
| `xsd:unsignedShort` | `RDF.XSD.UnsignedShort` |
|
|
| `xsd:unsignedByte` | `RDF.XSD.UnsignedByte` |
|
|
| `xsd:string` | `RDF.XSD.String` |
|
|
| `xsd:normalizedString` | ❌ |
|
|
| `xsd:token` | ❌ |
|
|
| `xsd:language` | ❌ |
|
|
| `xsd:Name` | ❌ |
|
|
| `xsd:NCName` | ❌ |
|
|
| `xsd:ID` | ❌ |
|
|
| `xsd:IDREF` | ❌ |
|
|
| `xsd:ENTITY` | ❌ |
|
|
| `xsd:NMTOKEN` | ❌ |
|
|
| `xsd:dateTime` | `RDF.XSD.DateTime` |
|
|
| `xsd:dateTimeStamp` | ❌ |
|
|
| `xsd:date` | `RDF.XSD.Date` |
|
|
| `xsd:time` | `RDF.XSD.Time` |
|
|
| `xsd:duration` | ❌ |
|
|
| `xsd:dayTimeDuration` | ❌ |
|
|
| `xsd:yearMonthDuration` | ❌ |
|
|
| `xsd:gYearMonth` | ❌ |
|
|
| `xsd:gYear` | ❌ |
|
|
| `xsd:gMonthDay` | ❌ |
|
|
| `xsd:gDay` | ❌ |
|
|
| `xsd:gMonth` | ❌ |
|
|
| `xsd:base64Binary` | ❌ |
|
|
| `xsd:hexBinary` | ❌ |
|
|
| `xsd:anyURI` | `RDF.XSD.AnyURI` |
|
|
| `xsd:QName` | ❌ |
|
|
| `xsd:NOTATION` | ❌ |
|
|
|
|
There are some notable difference in the implementations of some datatypes compared to
|
|
the original spec:
|
|
|
|
- `RDF.XSD.Integer` is not derived from `RDF.XSD.Decimal`, but implemented as a primitive datatype
|
|
- `RDF.XSD.Float` is not implemented as a primitive datatype, but derived from `RDF.XSD.Double`
|
|
without further restrictions instead, since Erlang doesn't have a corresponding datatype
|
|
|
|
see <https://www.w3.org/TR/xmlschema11-2/#built-in-datatypes>
|
|
"""
|
|
|
|
@type t :: module
|
|
|
|
@type uncanonical_lexical :: String.t() | nil
|
|
|
|
@type literal :: %{
|
|
:__struct__ => t(),
|
|
:value => any(),
|
|
:uncanonical_lexical => uncanonical_lexical()
|
|
}
|
|
|
|
import RDF.Utils.Guards
|
|
|
|
@doc """
|
|
Returns if the `RDF.XSD.Datatype` is a primitive datatype.
|
|
"""
|
|
@callback primitive?() :: boolean
|
|
|
|
@doc """
|
|
The base datatype from which a `RDF.XSD.Datatype` is derived.
|
|
|
|
Note: Since this library focuses on atomic types and the special `xsd:anyAtomicType`
|
|
specified as the base type of all primitive types in the W3C spec wouldn't serve any
|
|
purpose here, all primitive datatypes just return `nil` instead.
|
|
"""
|
|
@callback base :: t() | nil
|
|
|
|
@doc """
|
|
The primitive `RDF.XSD.Datatype` from which a `RDF.XSD.Datatype` is derived.
|
|
|
|
In case of a primitive `RDF.XSD.Datatype` this function returns this `RDF.XSD.Datatype` itself.
|
|
"""
|
|
@callback base_primitive :: t()
|
|
|
|
@doc """
|
|
Checks if the `RDF.XSD.Datatype` is directly or indirectly derived from the given `RDF.XSD.Datatype`.
|
|
|
|
Note that this is just a basic datatype reflection function on the module level
|
|
and does not work with `RDF.Literal`s. See `c:RDF.Literal.Datatype.datatype?/1` instead.
|
|
"""
|
|
@callback derived_from?(t()) :: boolean
|
|
|
|
@doc """
|
|
The set of applicable facets of a `RDF.XSD.Datatype`.
|
|
"""
|
|
@callback applicable_facets :: [RDF.XSD.Facet.t()]
|
|
|
|
@doc """
|
|
A mapping from the lexical space of a `RDF.XSD.Datatype` into its value space.
|
|
"""
|
|
@callback lexical_mapping(String.t(), Keyword.t()) :: any
|
|
|
|
@doc """
|
|
A mapping from Elixir values into the value space of a `RDF.XSD.Datatype`.
|
|
|
|
If the Elixir mapping for the given value can not be mapped into value space of
|
|
the XSD datatype an implementation should return `@invalid_value`
|
|
(which is just `nil` at the moment, so `nil` is never a valid value of a value space).
|
|
|
|
Otherwise a tuple `{value, lexical}` with `value` being the internal representation
|
|
of the mapped value from the value space and `lexical` being the lexical representation
|
|
to be used for the Elixir value or `nil` if `c:init_valid_lexical/3` should be used
|
|
to determine the lexical form in general (i.e. also when initialized with a string
|
|
via the `c:lexical_mapping/2`). Since the later case is most often what you want,
|
|
you can also return `value` directly, as long as it is not a two element tuple.
|
|
"""
|
|
@callback elixir_mapping(any, Keyword.t()) :: any | {any, uncanonical_lexical}
|
|
|
|
@doc """
|
|
Returns the standard lexical representation for a value of the value space of a `RDF.XSD.Datatype`.
|
|
"""
|
|
@callback canonical_mapping(any) :: String.t()
|
|
|
|
@doc """
|
|
Produces the lexical representation to be used for a `RDF.XSD.Datatype` literal.
|
|
|
|
By default the lexical representation of a `RDF.XSD.Datatype` is either the
|
|
canonical form in case it is created from a non-string Elixir value or, if it
|
|
is created from a string, just with that string as the lexical form.
|
|
|
|
But there can be various reasons for why this should be different for certain
|
|
datatypes. For example, for `RDF.XSD.Double`s given as Elixir floats, we want the
|
|
default lexical representation to be the decimal and not the canonical
|
|
exponential form. Another reason might be that additional options are given
|
|
which should be taken into account in the lexical form.
|
|
|
|
If the lexical representation for a given `value` and `lexical` should be the
|
|
canonical one, an implementation should return `nil`.
|
|
"""
|
|
@callback init_valid_lexical(any, uncanonical_lexical, Keyword.t()) :: uncanonical_lexical
|
|
|
|
@doc """
|
|
Produces the lexical representation of an invalid value.
|
|
|
|
The default implementation of the `_using__` macro just returns the `to_string/1`
|
|
representation of the value.
|
|
"""
|
|
@callback init_invalid_lexical(any, Keyword.t()) :: String.t()
|
|
|
|
@doc """
|
|
Returns the `RDF.XSD.Datatype` for a datatype IRI.
|
|
"""
|
|
defdelegate get(id), to: RDF.Literal.Datatype.Registry, as: :xsd_datatype
|
|
|
|
@doc false
|
|
def most_specific(left, right)
|
|
def most_specific(datatype, datatype), do: datatype
|
|
|
|
def most_specific(left, right) do
|
|
cond do
|
|
left.datatype?(right) -> right
|
|
right.datatype?(left) -> left
|
|
true -> nil
|
|
end
|
|
end
|
|
|
|
defmacro __using__(opts) do
|
|
quote do
|
|
defstruct [:value, :uncanonical_lexical]
|
|
|
|
@behaviour unquote(__MODULE__)
|
|
use RDF.Literal.Datatype, unquote(opts)
|
|
|
|
@invalid_value nil
|
|
|
|
@type invalid_value :: nil
|
|
@type value :: valid_value | invalid_value
|
|
|
|
@type t :: %__MODULE__{
|
|
value: value,
|
|
uncanonical_lexical: RDF.XSD.Datatype.uncanonical_lexical()
|
|
}
|
|
|
|
@doc !"""
|
|
This function is just used to check if a module is a RDF.XSD.Datatype.
|
|
|
|
See `RDF.Literal.Datatype.Registry.is_xsd_datatype?/1`.
|
|
"""
|
|
def __xsd_datatype_indicator__, do: true
|
|
|
|
@doc """
|
|
Checks if the given literal has datatype this or a datatype that is derived of it.
|
|
"""
|
|
@impl RDF.Literal.Datatype
|
|
def datatype?(%RDF.Literal{literal: literal}), do: datatype?(literal)
|
|
def datatype?(%datatype{}), do: datatype?(datatype)
|
|
def datatype?(__MODULE__), do: true
|
|
|
|
def datatype?(datatype) when maybe_module(datatype) do
|
|
RDF.XSD.datatype?(datatype) and datatype.derived_from?(__MODULE__)
|
|
end
|
|
|
|
def datatype?(_), do: false
|
|
|
|
@doc false
|
|
def datatype!(%__MODULE__{}), do: true
|
|
|
|
def datatype!(%datatype{} = literal) do
|
|
datatype?(datatype) ||
|
|
raise RDF.XSD.Datatype.Mismatch, value: literal, expected_type: __MODULE__
|
|
end
|
|
|
|
def datatype!(value),
|
|
do: raise(RDF.XSD.Datatype.Mismatch, value: value, expected_type: __MODULE__)
|
|
|
|
@doc """
|
|
Creates a new `RDF.Literal` with this datatype and the given `value`.
|
|
"""
|
|
# Dialyzer causes a warning on all primitives since the facet_conform?/2 call
|
|
# always returns true there, so the other branch is unnecessary. This could
|
|
# be fixed by generating a special version for primitives, but it's not worth
|
|
# maintaining different versions of this function which must be kept in-sync.
|
|
@dialyzer {:nowarn_function, new: 2}
|
|
@impl RDF.Literal.Datatype
|
|
def new(value, opts \\ [])
|
|
|
|
def new(lexical, opts) when is_binary(lexical) do
|
|
case lexical_mapping(lexical, opts) do
|
|
@invalid_value ->
|
|
build_invalid(lexical, opts)
|
|
|
|
value ->
|
|
if facet_conform?(value, lexical) do
|
|
build_valid(value, lexical, opts)
|
|
else
|
|
build_invalid(lexical, opts)
|
|
end
|
|
end
|
|
end
|
|
|
|
def new(value, opts) do
|
|
case elixir_mapping(value, opts) do
|
|
@invalid_value ->
|
|
build_invalid(value, opts)
|
|
|
|
value ->
|
|
{value, lexical} =
|
|
case value do
|
|
{value, lexical} -> {value, lexical}
|
|
value -> {value, nil}
|
|
end
|
|
|
|
if facet_conform?(value, lexical) do
|
|
build_valid(value, lexical, opts)
|
|
else
|
|
build_invalid(value, opts)
|
|
end
|
|
end
|
|
end
|
|
|
|
@doc """
|
|
Creates a new `RDF.Literal` with this datatype and the given `value` or fails when it is not valid.
|
|
"""
|
|
@impl RDF.Literal.Datatype
|
|
def new!(value, opts \\ []) do
|
|
literal = new(value, opts)
|
|
|
|
if valid?(literal) do
|
|
literal
|
|
else
|
|
raise ArgumentError, "#{inspect(value)} is not a valid #{inspect(__MODULE__)}"
|
|
end
|
|
end
|
|
|
|
@doc false
|
|
@spec build_valid(any, RDF.XSD.Datatype.uncanonical_lexical(), Keyword.t()) ::
|
|
RDF.Literal.t()
|
|
def build_valid(value, lexical, opts) do
|
|
if Keyword.get(opts, :canonicalize) do
|
|
literal(%__MODULE__{value: value})
|
|
else
|
|
initial_lexical = init_valid_lexical(value, lexical, opts)
|
|
|
|
literal(%__MODULE__{
|
|
value: value,
|
|
uncanonical_lexical:
|
|
if(initial_lexical && initial_lexical != canonical_mapping(value),
|
|
do: initial_lexical
|
|
)
|
|
})
|
|
end
|
|
end
|
|
|
|
@dialyzer {:nowarn_function, build_invalid: 2}
|
|
defp build_invalid(lexical, opts) do
|
|
literal(%__MODULE__{uncanonical_lexical: init_invalid_lexical(lexical, opts)})
|
|
end
|
|
|
|
@doc """
|
|
Returns the value of a `RDF.Literal` of this or a derived datatype.
|
|
"""
|
|
@impl RDF.Literal.Datatype
|
|
def value(%RDF.Literal{literal: literal}), do: value(literal)
|
|
def value(%__MODULE__{} = literal), do: literal.value
|
|
|
|
def value(literal) do
|
|
datatype!(literal)
|
|
|
|
literal.value
|
|
end
|
|
|
|
@doc """
|
|
Returns the lexical form of a `RDF.Literal` of this datatype.
|
|
"""
|
|
@impl RDF.Literal.Datatype
|
|
def lexical(lexical)
|
|
|
|
def lexical(%RDF.Literal{literal: literal}), do: lexical(literal)
|
|
|
|
def lexical(%__MODULE__{value: value, uncanonical_lexical: nil}),
|
|
do: canonical_mapping(value)
|
|
|
|
def lexical(%__MODULE__{uncanonical_lexical: lexical}), do: lexical
|
|
|
|
@doc """
|
|
Returns the canonical lexical form of a `RDF.Literal` of this datatype.
|
|
"""
|
|
@impl RDF.Literal.Datatype
|
|
def canonical_lexical(%RDF.Literal{literal: literal}), do: canonical_lexical(literal)
|
|
|
|
def canonical_lexical(%__MODULE__{value: value}) when not is_nil(value),
|
|
do: canonical_mapping(value)
|
|
|
|
def canonical_lexical(_), do: nil
|
|
|
|
@doc """
|
|
Produces the canonical representation of a `RDF.Literal` of this datatype.
|
|
"""
|
|
@impl RDF.Literal.Datatype
|
|
def canonical(literal)
|
|
|
|
def canonical(%RDF.Literal{literal: %__MODULE__{uncanonical_lexical: nil}} = literal),
|
|
do: literal
|
|
|
|
def canonical(%RDF.Literal{literal: %__MODULE__{value: @invalid_value}} = literal),
|
|
do: literal
|
|
|
|
def canonical(%RDF.Literal{literal: %__MODULE__{} = literal}),
|
|
do: canonical(literal)
|
|
|
|
def canonical(%__MODULE__{} = literal),
|
|
do: literal(%__MODULE__{literal | uncanonical_lexical: nil})
|
|
|
|
@doc """
|
|
Determines if the lexical form of a `RDF.Literal` of this datatype is the canonical form.
|
|
"""
|
|
@impl RDF.Literal.Datatype
|
|
def canonical?(literal)
|
|
def canonical?(%RDF.Literal{literal: literal}), do: canonical?(literal)
|
|
def canonical?(%__MODULE__{uncanonical_lexical: nil}), do: true
|
|
def canonical?(%__MODULE__{}), do: false
|
|
|
|
@doc """
|
|
Determines if a `RDF.Literal` of this or a derived datatype has a proper value of its value space.
|
|
"""
|
|
@impl RDF.Literal.Datatype
|
|
def valid?(literal)
|
|
def valid?(%RDF.Literal{literal: literal}), do: valid?(literal)
|
|
def valid?(%__MODULE__{value: @invalid_value}), do: false
|
|
def valid?(%__MODULE__{}), do: true
|
|
|
|
def valid?(%datatype{} = literal),
|
|
do: datatype?(datatype) and datatype.valid?(literal)
|
|
|
|
def valid?(_), do: false
|
|
|
|
@doc false
|
|
defp equality_path(left_datatype, right_datatype)
|
|
defp equality_path(datatype, datatype), do: {:same_or_derived, datatype}
|
|
|
|
defp equality_path(left_datatype, right_datatype) do
|
|
if RDF.XSD.datatype?(left_datatype) and RDF.XSD.datatype?(right_datatype) do
|
|
if datatype = RDF.XSD.Datatype.most_specific(left_datatype, right_datatype) do
|
|
{:same_or_derived, datatype}
|
|
else
|
|
{:different, left_datatype}
|
|
end
|
|
else
|
|
{:different, left_datatype}
|
|
end
|
|
end
|
|
|
|
@doc """
|
|
Compares two `RDF.Literal`s.
|
|
|
|
If the first literal is greater than the second `:gt` is returned, if less than `:lt` is returned.
|
|
If both literal are equal `:eq` is returned.
|
|
If the literals can not be compared either `nil` is returned, when they generally can be compared
|
|
due to their datatype, or `:indeterminate` is returned, when the order of the given values is
|
|
not defined on only partially ordered datatypes.
|
|
"""
|
|
@spec compare(RDF.Literal.t() | any, RDF.Literal.t() | any) ::
|
|
RDF.Literal.Datatype.comparison_result() | :indeterminate | nil
|
|
def compare(left, right)
|
|
def compare(left, %RDF.Literal{literal: right}), do: compare(left, right)
|
|
def compare(%RDF.Literal{literal: left}, right), do: compare(left, right)
|
|
|
|
def compare(left, right) do
|
|
if RDF.XSD.datatype?(left) and RDF.XSD.datatype?(right) and
|
|
RDF.Literal.Datatype.valid?(left) and RDF.Literal.Datatype.valid?(right) do
|
|
do_compare(left, right)
|
|
end
|
|
end
|
|
|
|
defimpl Inspect do
|
|
"Elixir.Inspect." <> datatype_name = to_string(__MODULE__)
|
|
@datatype_name datatype_name
|
|
|
|
def inspect(literal, _opts) do
|
|
"%#{@datatype_name}{value: #{inspect(literal.value)}, lexical: #{
|
|
literal |> literal.__struct__.lexical() |> inspect()
|
|
}}"
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|