447 lines
13 KiB
Elixir
447 lines
13 KiB
Elixir
defmodule RDF.Literal do
|
|
@moduledoc """
|
|
RDF literals are leaf nodes of a RDF graph containing raw data, like strings and numbers.
|
|
"""
|
|
|
|
alias RDF.Datatype.NS.XSD
|
|
alias RDF.IRI
|
|
|
|
@type literal_value ::
|
|
RDF.Boolean.value
|
|
| RDF.Integer.value
|
|
| RDF.Double.value
|
|
| RDF.String.value
|
|
| RDF.Decimal.value
|
|
| RDF.Date.value
|
|
| RDF.Time.value
|
|
| RDF.DateTime.value
|
|
|
|
@type t :: %__MODULE__{
|
|
value: literal_value,
|
|
datatype: IRI.t,
|
|
uncanonical_lexical: String.t | nil,
|
|
language: String.t | nil
|
|
}
|
|
|
|
defstruct [:value, :datatype, :uncanonical_lexical, :language]
|
|
|
|
# to be able to pattern-match on plain types; we can't use RDF.Literal.Guards here since these aren't compiled here yet
|
|
@xsd_string XSD.string
|
|
@lang_string RDF.iri("http://www.w3.org/1999/02/22-rdf-syntax-ns#langString")
|
|
@plain_types [@xsd_string, @lang_string]
|
|
|
|
|
|
@doc """
|
|
Creates a new `RDF.Literal` of the given value and tries to infer an appropriate XSD datatype.
|
|
|
|
Note: The `RDF.literal` function is a shortcut to this function.
|
|
|
|
The following mapping of Elixir types to XSD datatypes is applied:
|
|
|
|
| Elixir datatype | XSD datatype |
|
|
| :-------------- | :------------- |
|
|
| `string` | `xsd:string` |
|
|
| `boolean` | `xsd:boolean` |
|
|
| `integer` | `xsd:integer` |
|
|
| `float` | `xsd:double` |
|
|
| `Time` | `xsd:time` |
|
|
| `Date` | `xsd:date` |
|
|
| `DateTime` | `xsd:dateTime` |
|
|
| `NaiveDateTime` | `xsd:dateTime` |
|
|
|
|
|
|
## Examples
|
|
|
|
iex> RDF.Literal.new(42)
|
|
%RDF.Literal{value: 42, datatype: XSD.integer}
|
|
|
|
"""
|
|
@spec new(literal_value | t) :: t
|
|
def new(value)
|
|
|
|
def new(%RDF.Literal{} = literal), do: literal
|
|
|
|
def new(value) when is_binary(value), do: RDF.String.new(value)
|
|
def new(value) when is_boolean(value), do: RDF.Boolean.new(value)
|
|
def new(value) when is_integer(value), do: RDF.Integer.new(value)
|
|
def new(value) when is_float(value), do: RDF.Double.new(value)
|
|
def new(%Decimal{} = value), do: RDF.Decimal.new(value)
|
|
|
|
def new(%Date{} = value), do: RDF.Date.new(value)
|
|
def new(%Time{} = value), do: RDF.Time.new(value)
|
|
def new(%DateTime{} = value), do: RDF.DateTime.new(value)
|
|
def new(%NaiveDateTime{} = value), do: RDF.DateTime.new(value)
|
|
|
|
|
|
def new(value) do
|
|
raise RDF.Literal.InvalidError, "#{inspect value} not convertible to a RDF.Literal"
|
|
end
|
|
|
|
@doc """
|
|
Creates a new `RDF.Literal` with the given datatype or language tag.
|
|
"""
|
|
@spec new(literal_value | t, map | keyword) :: t
|
|
def new(value, opts)
|
|
|
|
def new(value, opts) when is_list(opts),
|
|
do: new(value, Map.new(opts))
|
|
|
|
def new(value, %{language: nil} = opts),
|
|
do: new(value, Map.delete(opts, :language))
|
|
|
|
def new(value, %{language: _} = opts) do
|
|
if is_binary(value) do
|
|
if opts[:datatype] in [nil, @lang_string] do
|
|
RDF.LangString.new(value, opts)
|
|
else
|
|
raise ArgumentError, "datatype with language must be rdf:langString"
|
|
end
|
|
else
|
|
new(value, Map.delete(opts, :language)) # Should we raise a warning?
|
|
end
|
|
end
|
|
|
|
def new(value, %{datatype: %RDF.IRI{} = id} = opts) do
|
|
case RDF.Datatype.get(id) do
|
|
nil -> %RDF.Literal{value: value, datatype: id}
|
|
datatype -> datatype.new(value, opts)
|
|
end
|
|
end
|
|
|
|
def new(value, %{datatype: datatype} = opts),
|
|
do: new(value, %{opts | datatype: RDF.iri(datatype)})
|
|
|
|
def new(value, opts) when is_map(opts) and map_size(opts) == 0,
|
|
do: new(value)
|
|
|
|
|
|
@doc """
|
|
Creates a new `RDF.Literal`, but fails if it's not valid.
|
|
|
|
Note: Validation is only possible if an `RDF.Datatype` with an implementation of
|
|
`RDF.Datatype.valid?/1` exists.
|
|
|
|
## Examples
|
|
|
|
iex> RDF.Literal.new!("3.14", datatype: XSD.double) == RDF.Literal.new("3.14", datatype: XSD.double)
|
|
true
|
|
|
|
iex> RDF.Literal.new!("invalid", datatype: "http://example/unkown_datatype") == RDF.Literal.new("invalid", datatype: "http://example/unkown_datatype")
|
|
true
|
|
|
|
iex> RDF.Literal.new!("foo", datatype: XSD.integer)
|
|
** (RDF.Literal.InvalidError) invalid RDF.Literal: %RDF.Literal{value: nil, lexical: "foo", datatype: ~I<http://www.w3.org/2001/XMLSchema#integer>}
|
|
|
|
iex> RDF.Literal.new!("foo", datatype: RDF.langString)
|
|
** (RDF.Literal.InvalidError) invalid RDF.Literal: %RDF.Literal{value: "foo", datatype: ~I<http://www.w3.org/1999/02/22-rdf-syntax-ns#langString>, language: nil}
|
|
|
|
"""
|
|
@spec new!(literal_value | t, map | keyword) :: t
|
|
def new!(value, opts \\ %{}) do
|
|
with %RDF.Literal{} = literal <- new(value, opts) do
|
|
if valid?(literal) do
|
|
literal
|
|
else
|
|
raise RDF.Literal.InvalidError, "invalid RDF.Literal: #{inspect literal}"
|
|
end
|
|
else
|
|
invalid ->
|
|
raise RDF.Literal.InvalidError, "invalid result of RDF.Literal.new: #{inspect invalid}"
|
|
end
|
|
end
|
|
|
|
@doc """
|
|
Returns the lexical representation of the given literal according to its datatype.
|
|
"""
|
|
@spec lexical(t) :: String.t
|
|
def lexical(%RDF.Literal{value: value, uncanonical_lexical: nil, datatype: id} = literal) do
|
|
case RDF.Datatype.get(id) do
|
|
nil -> to_string(value)
|
|
datatype -> datatype.lexical(literal)
|
|
end
|
|
end
|
|
|
|
def lexical(%RDF.Literal{uncanonical_lexical: lexical}), do: lexical
|
|
|
|
@doc """
|
|
Returns the given literal in its canonical lexical representation.
|
|
"""
|
|
@spec canonical(t) :: t
|
|
def canonical(%RDF.Literal{uncanonical_lexical: nil} = literal), do: literal
|
|
def canonical(%RDF.Literal{datatype: id} = literal) do
|
|
case RDF.Datatype.get(id) do
|
|
nil -> literal
|
|
datatype -> datatype.canonical(literal)
|
|
end
|
|
end
|
|
|
|
|
|
@doc """
|
|
Returns if the given literal is in its canonical lexical representation.
|
|
"""
|
|
@spec canonical?(t) :: boolean
|
|
def canonical?(%RDF.Literal{uncanonical_lexical: nil}), do: true
|
|
def canonical?(%RDF.Literal{} = _), do: false
|
|
|
|
|
|
@doc """
|
|
Returns if the value of the given literal is a valid according to its datatype.
|
|
"""
|
|
@spec valid?(t) :: boolean
|
|
def valid?(%RDF.Literal{datatype: id} = literal) do
|
|
case RDF.Datatype.get(id) do
|
|
nil -> true
|
|
datatype -> datatype.valid?(literal)
|
|
end
|
|
end
|
|
|
|
|
|
@doc """
|
|
Returns if a literal is a simple literal.
|
|
|
|
A simple literal has no datatype or language.
|
|
|
|
see <http://www.w3.org/TR/sparql11-query/#simple_literal>
|
|
"""
|
|
@spec simple?(t) :: boolean
|
|
def simple?(%RDF.Literal{datatype: @xsd_string}), do: true
|
|
def simple?(%RDF.Literal{} = _), do: false
|
|
|
|
|
|
@doc """
|
|
Returns if a literal is a language-tagged literal.
|
|
|
|
see <http://www.w3.org/TR/rdf-concepts/#dfn-plain-literal>
|
|
"""
|
|
@spec has_language?(t) :: boolean
|
|
def has_language?(%RDF.Literal{datatype: @lang_string}), do: true
|
|
def has_language?(%RDF.Literal{} = _), do: false
|
|
|
|
|
|
@doc """
|
|
Returns if a literal is a datatyped literal.
|
|
|
|
For historical reasons, this excludes `xsd:string` and `rdf:langString`.
|
|
|
|
see <http://www.w3.org/TR/rdf-concepts/#dfn-typed-literal>
|
|
"""
|
|
@spec has_datatype?(t) :: boolean
|
|
def has_datatype?(literal) do
|
|
not plain?(literal) and not has_language?(literal)
|
|
end
|
|
|
|
|
|
@doc """
|
|
Returns if a literal is a plain literal.
|
|
|
|
A plain literal may have a language, but may not have a datatype.
|
|
For all practical purposes, this includes `xsd:string` literals too.
|
|
|
|
see <http://www.w3.org/TR/rdf-concepts/#dfn-plain-literal>
|
|
"""
|
|
@spec plain?(t) :: boolean
|
|
def plain?(%RDF.Literal{datatype: datatype})
|
|
when datatype in @plain_types, do: true
|
|
def plain?(%RDF.Literal{} = _), do: false
|
|
|
|
@spec typed?(t) :: boolean
|
|
def typed?(literal), do: not plain?(literal)
|
|
|
|
|
|
@doc """
|
|
Checks if two `RDF.Literal`s are equal.
|
|
|
|
Non-RDF terms are tried to be coerced via `RDF.Term.coerce/1` before comparison.
|
|
|
|
Returns `nil` when the given arguments are not comparable as Literals.
|
|
|
|
see <https://www.w3.org/TR/rdf-concepts/#section-Literal-Equality>
|
|
"""
|
|
@spec equal_value?(t | IRI.t | any, t | IRI.t | any) :: boolean | nil
|
|
def equal_value?(left, right)
|
|
|
|
def equal_value?(%RDF.Literal{datatype: id1} = literal1, %RDF.Literal{datatype: id2} = literal2) do
|
|
case RDF.Datatype.get(id1) do
|
|
nil ->
|
|
if id1 == id2 do
|
|
literal1.value == literal2.value
|
|
end
|
|
datatype ->
|
|
datatype.equal_value?(literal1, literal2)
|
|
end
|
|
end
|
|
|
|
# TODO: Handle AnyURI in its own RDF.Datatype implementation
|
|
@xsd_any_uri "http://www.w3.org/2001/XMLSchema#anyURI"
|
|
|
|
def equal_value?(%RDF.Literal{datatype: %RDF.IRI{value: @xsd_any_uri}} = left, right),
|
|
do: RDF.IRI.equal_value?(left, right)
|
|
|
|
def equal_value?(left, %RDF.Literal{datatype: %RDF.IRI{value: @xsd_any_uri}} = right),
|
|
do: RDF.IRI.equal_value?(left, right)
|
|
|
|
def equal_value?(%RDF.Literal{} = left, right) when not is_nil(right) do
|
|
unless RDF.Term.term?(right) do
|
|
equal_value?(left, RDF.Term.coerce(right))
|
|
end
|
|
end
|
|
|
|
def equal_value?(_, _), do: nil
|
|
|
|
|
|
@doc """
|
|
Checks if the first of two `RDF.Literal`s is smaller then the other.
|
|
|
|
Returns `nil` when the given arguments are not comparable datatypes.
|
|
|
|
"""
|
|
@spec less_than?(t | any, t | any) :: boolean | nil
|
|
def less_than?(literal1, literal2) do
|
|
case compare(literal1, literal2) do
|
|
:lt -> true
|
|
nil -> nil
|
|
_ -> false
|
|
end
|
|
end
|
|
|
|
@doc """
|
|
Checks if the first of two `RDF.Literal`s is greater then the other.
|
|
|
|
Returns `nil` when the given arguments are not comparable datatypes.
|
|
|
|
"""
|
|
@spec greater_than?(t | any, t | any) :: boolean | nil
|
|
def greater_than?(literal1, literal2) do
|
|
case compare(literal1, literal2) do
|
|
:gt -> true
|
|
nil -> nil
|
|
_ -> false
|
|
end
|
|
end
|
|
|
|
|
|
@doc """
|
|
Compares two `RDF.Literal`s.
|
|
|
|
Returns `:gt` if first literal is greater than the second in terms of their datatype
|
|
and `:lt` for vice versa. If the two literals are equal `:eq` is returned.
|
|
For datatypes with only partial ordering `:indeterminate` is returned when the
|
|
order of the given literals is not defined.
|
|
|
|
Returns `nil` when the given arguments are not comparable datatypes.
|
|
|
|
"""
|
|
@spec compare(t | any, t | any) :: :eq | :lt | :gt | nil
|
|
def compare(left, right)
|
|
|
|
def compare(%RDF.Literal{datatype: id1} = literal1, %RDF.Literal{datatype: id2} = literal2) do
|
|
case RDF.Datatype.get(id1) do
|
|
nil ->
|
|
if id1 == id2 do
|
|
cond do
|
|
literal1.value == literal2.value -> :eq
|
|
literal1.value < literal2.value -> :lt
|
|
true -> :gt
|
|
end
|
|
end
|
|
|
|
datatype ->
|
|
datatype.compare(literal1, literal2)
|
|
end
|
|
end
|
|
|
|
def compare(_, _), do: nil
|
|
|
|
|
|
@doc """
|
|
Matches the string representation of the given value against a XPath and XQuery regular expression pattern.
|
|
|
|
The regular expression language is defined in _XQuery 1.0 and XPath 2.0 Functions and Operators_.
|
|
|
|
The `pattern` and the optional `flags` can be given as an Elixir string or as
|
|
`xsd:string` `RDF.Literal`s.
|
|
|
|
see <https://www.w3.org/TR/xpath-functions/#func-matches>
|
|
"""
|
|
@spec matches?(t | String.t, t | String.t, t | String.t) :: boolean
|
|
def matches?(value, pattern, flags \\ "") do
|
|
string = to_string(value)
|
|
case xpath_pattern(pattern, flags) do
|
|
{:regex, regex} ->
|
|
Regex.match?(regex, string)
|
|
|
|
{:q, pattern} ->
|
|
String.contains?(string, pattern)
|
|
|
|
{:qi, pattern} ->
|
|
string
|
|
|> String.downcase()
|
|
|> String.contains?(String.downcase(pattern))
|
|
|
|
_ ->
|
|
raise "Invalid XQuery regex pattern or flags"
|
|
end
|
|
end
|
|
|
|
@doc false
|
|
@spec xpath_pattern(t | String.t, t | String.t) ::
|
|
{:q | :qi, String.t} | {:regex, Regex.t} | {:error, any}
|
|
def xpath_pattern(pattern, flags)
|
|
|
|
def xpath_pattern(%RDF.Literal{datatype: @xsd_string} = pattern, flags),
|
|
do: xpath_pattern(pattern.value, flags)
|
|
|
|
def xpath_pattern(pattern, %RDF.Literal{datatype: @xsd_string} = flags),
|
|
do: xpath_pattern(pattern, flags.value)
|
|
|
|
def xpath_pattern(pattern, flags) when is_binary(pattern) and is_binary(flags) do
|
|
q_pattern(pattern, flags) || xpath_regex_pattern(pattern, flags)
|
|
end
|
|
|
|
defp q_pattern(pattern, flags) do
|
|
if String.contains?(flags, "q") and String.replace(flags, ~r/[qi]/, "") == "" do
|
|
{(if String.contains?(flags, "i"), do: :qi, else: :q), pattern}
|
|
end
|
|
end
|
|
|
|
defp xpath_regex_pattern(pattern, flags) do
|
|
with {:ok, regex} <-
|
|
pattern
|
|
|> convert_utf_escaping()
|
|
|> Regex.compile(xpath_regex_flags(flags)) do
|
|
{:regex, regex}
|
|
end
|
|
end
|
|
|
|
@doc false
|
|
@spec convert_utf_escaping(String.t) :: String.t
|
|
def convert_utf_escaping(string) do
|
|
require Integer
|
|
|
|
xpath_unicode_regex = ~r/(\\*)\\U([0-9]|[A-F]|[a-f]){2}(([0-9]|[A-F]|[a-f]){6})/
|
|
[first | possible_matches] =
|
|
Regex.split(xpath_unicode_regex, string, include_captures: true)
|
|
|
|
[first |
|
|
Enum.map_every(possible_matches, 2, fn possible_xpath_unicode ->
|
|
[_, escapes, _, codepoint, _] = Regex.run(xpath_unicode_regex, possible_xpath_unicode)
|
|
if escapes |> String.length() |> Integer.is_odd() do
|
|
"#{escapes}\\u{#{codepoint}}"
|
|
else
|
|
"\\" <> possible_xpath_unicode
|
|
end
|
|
end)
|
|
]
|
|
|> Enum.join()
|
|
end
|
|
|
|
defp xpath_regex_flags(flags) do
|
|
String.replace(flags, "q", "") <> "u"
|
|
end
|
|
end
|
|
|
|
defimpl String.Chars, for: RDF.Literal do
|
|
def to_string(literal) do
|
|
RDF.Literal.lexical(literal)
|
|
end
|
|
end
|