197 lines
5.8 KiB
Elixir
197 lines
5.8 KiB
Elixir
defmodule RDF.IRI do
|
|
@moduledoc """
|
|
A structure for IRIs.
|
|
|
|
This structure just wraps a plain IRI string and doesn't bother with the
|
|
components of the IRI, since in the context of RDF there are usually very many
|
|
IRIs and parsing them isn't needed in most cases. For these reasons we don't
|
|
use Elixirs built-in `URI` structure, because it would be unnecessary
|
|
expensive in terms of performance and memory.
|
|
|
|
The component parts can always be retrieved with the `RDF.IRI.parse/1`
|
|
function, which returns Elixirs built-in `URI` structure. Note, that `URI`
|
|
doesn't escape Unicode characters by default, so it's a suitable structure for
|
|
IRIs.
|
|
|
|
see <https://tools.ietf.org/html/rfc3987>
|
|
"""
|
|
|
|
defstruct [:value]
|
|
|
|
alias RDF.Namespace
|
|
|
|
@type t :: module
|
|
|
|
# see https://tools.ietf.org/html/rfc3986#appendix-B
|
|
@scheme_regex Regex.recompile!(~r/^([a-z][a-z0-9\+\-\.]*):/i)
|
|
|
|
|
|
@doc """
|
|
Creates a `RDF.IRI`.
|
|
"""
|
|
def new(iri)
|
|
def new(iri) when is_binary(iri), do: %RDF.IRI{value: iri}
|
|
def new(qname) when is_atom(qname) and not qname in [nil, true, false],
|
|
do: Namespace.resolve_term(qname)
|
|
def new(%URI{} = uri), do: uri |> URI.to_string |> new
|
|
def new(%RDF.IRI{} = iri), do: iri
|
|
|
|
@doc """
|
|
Creates a `RDF.IRI`, but checks if the given IRI is valid.
|
|
|
|
If the given IRI is not valid a `RDF.IRI.InvalidError` is raised.
|
|
|
|
see `valid?/1`
|
|
"""
|
|
def new!(iri)
|
|
def new!(iri) when is_binary(iri), do: iri |> valid!() |> new()
|
|
def new!(qname) when is_atom(qname) and not qname in [nil, true, false],
|
|
do: new(qname) # since terms of a namespace are already validated
|
|
def new!(%URI{} = uri), do: uri |> valid!() |> new()
|
|
def new!(%RDF.IRI{} = iri), do: valid!(iri)
|
|
|
|
|
|
@doc """
|
|
Returns the given value unchanged if it's a valid IRI, otherwise raises an exception.
|
|
|
|
## Examples
|
|
|
|
iex> RDF.IRI.valid!("http://www.example.com/foo")
|
|
"http://www.example.com/foo"
|
|
iex> RDF.IRI.valid!(RDF.IRI.new("http://www.example.com/foo"))
|
|
RDF.IRI.new("http://www.example.com/foo")
|
|
iex> RDF.IRI.valid!("not an iri")
|
|
** (RDF.IRI.InvalidError) Invalid IRI: "not an iri"
|
|
"""
|
|
def valid!(iri) do
|
|
if not valid?(iri), do: raise RDF.IRI.InvalidError, "Invalid IRI: #{inspect iri}"
|
|
iri
|
|
end
|
|
|
|
|
|
@doc """
|
|
Checks if the given IRI is valid.
|
|
|
|
Note: This currently checks only if the given IRI is absolute.
|
|
|
|
## Examples
|
|
|
|
iex> RDF.IRI.valid?("http://www.example.com/foo")
|
|
true
|
|
iex> RDF.IRI.valid?("not an iri")
|
|
false
|
|
"""
|
|
def valid?(iri), do: absolute?(iri) # TODO: Provide a more elaborate validation
|
|
|
|
|
|
@doc """
|
|
Checks if the given value is an absolute IRI.
|
|
|
|
An absolute IRI is defined in [RFC3987](http://www.ietf.org/rfc/rfc3987.txt)
|
|
containing a scheme along with a path and optional query and fragment segments.
|
|
"""
|
|
def absolute?(iri)
|
|
|
|
def absolute?(value) when is_binary(value), do: not is_nil(scheme(value))
|
|
def absolute?(%RDF.IRI{value: value}), do: absolute?(value)
|
|
def absolute?(%URI{scheme: nil}), do: false
|
|
def absolute?(%URI{scheme: _}), do: true
|
|
def absolute?(qname) when is_atom(qname) and not qname in [nil, true, false] do
|
|
try do
|
|
qname |> Namespace.resolve_term |> absolute?()
|
|
rescue
|
|
_ -> false
|
|
end
|
|
end
|
|
def absolute?(_), do: false
|
|
|
|
|
|
@doc """
|
|
Resolves a relative IRI against a base IRI.
|
|
|
|
as specified in [section 5.1 Establishing a Base URI of RFC3986](http://tools.ietf.org/html/rfc3986#section-5.1).
|
|
Only the basic algorithm in [section 5.2 of RFC3986](http://tools.ietf.org/html/rfc3986#section-5.2)
|
|
is used; neither Syntax-Based Normalization nor Scheme-Based Normalization are performed.
|
|
|
|
Characters additionally allowed in IRI references are treated in the same way that unreserved
|
|
characters are treated in URI references, per [section 6.5 of RFC3987](http://tools.ietf.org/html/rfc3987#section-6.5)
|
|
"""
|
|
def absolute(iri, base) do
|
|
if absolute?(iri) do
|
|
new(iri)
|
|
else
|
|
merge(base, iri)
|
|
end
|
|
end
|
|
|
|
|
|
@doc """
|
|
Merges two IRIs.
|
|
|
|
This function merges two IRIs as per
|
|
[RFC 3986, section 5.2](https://tools.ietf.org/html/rfc3986#section-5.2).
|
|
"""
|
|
def merge(base, rel) do
|
|
base
|
|
|> parse()
|
|
|> URI.merge(parse(rel))
|
|
|> empty_fragment_shim(rel)
|
|
|> new()
|
|
end
|
|
|
|
|
|
@doc false
|
|
# shim for https://github.com/elixir-lang/elixir/pull/6419
|
|
def empty_fragment_shim(_, %URI{} = uri), do: uri
|
|
def empty_fragment_shim(uri, %RDF.IRI{value: value}),
|
|
do: empty_fragment_shim(uri, value)
|
|
def empty_fragment_shim(uri, original) do
|
|
if String.ends_with?(original, "#") do
|
|
%URI{uri | fragment: ""}
|
|
else
|
|
uri
|
|
end
|
|
end
|
|
|
|
|
|
@doc """
|
|
Returns the scheme of the given IRI
|
|
|
|
If the given string is not a valid absolute IRI, `nil` is returned.
|
|
|
|
## Examples
|
|
|
|
iex> RDF.IRI.scheme("http://www.example.com/foo")
|
|
"http"
|
|
iex> RDF.IRI.scheme("not an iri")
|
|
nil
|
|
"""
|
|
def scheme(iri)
|
|
def scheme(%RDF.IRI{value: value}), do: scheme(value)
|
|
def scheme(%URI{scheme: scheme}), do: scheme
|
|
def scheme(qname) when is_atom(qname), do: Namespace.resolve_term(qname) |> scheme()
|
|
def scheme(iri) when is_binary(iri) do
|
|
with [_, scheme] <- Regex.run(@scheme_regex, iri) do
|
|
scheme
|
|
end
|
|
end
|
|
|
|
|
|
@doc """
|
|
Parses an IRI into its components and returns them as an `URI` struct.
|
|
"""
|
|
def parse(iri)
|
|
def parse(iri) when is_binary(iri), do: URI.parse(iri) |> empty_fragment_shim(iri)
|
|
def parse(qname) when is_atom(qname) and not qname in [nil, true, false],
|
|
do: Namespace.resolve_term(qname) |> parse()
|
|
def parse(%RDF.IRI{value: value}), do: URI.parse(value) |> empty_fragment_shim(value)
|
|
def parse(%URI{} = uri), do: uri
|
|
|
|
|
|
defimpl String.Chars do
|
|
def to_string(%RDF.IRI{value: value}) do
|
|
value
|
|
end
|
|
end
|
|
|
|
end
|