defmodule RDF.Vocabulary.Namespace do @moduledoc """ An RDF vocabulary as a `RDF.Namespace`. `RDF.Vocabulary.Namespace` modules represent a RDF vocabulary as a `RDF.Namespace`. They can be defined with the `defvocab/2` macro of this module. RDF.ex comes with predefined modules for some fundamental vocabularies in the `RDF.NS` module. """ alias RDF.Description alias RDF.Utils.ResourceClassifier import RDF.Utils, only: [downcase?: 1] @type t :: module # Note: We're not using :code.priv_dir/1 here on purpose, since vocabulary files should be # searched in the vocabs dir of the project in which the vocabulary namespace is defined. @vocabs_dir "priv/vocabs" defmacro __using__(_opts) do quote do import unquote(__MODULE__) end end @doc """ Defines a `RDF.Namespace` module for a RDF vocabulary. """ defmacro defvocab(name, opts) do strict = strict?(opts) base_iri = base_iri!(opts) file = filename!(opts) {terms, data} = case source!(opts) do {:terms, terms} -> {terms, nil} {:data, data} -> {rdf_data_vocab_terms(data, base_iri), data} end unless Mix.env() == :test do IO.puts("Compiling vocabulary namespace for #{base_iri}") end ignored_terms = ignored_terms!(opts) terms = terms |> term_mapping!(opts) |> Map.drop(MapSet.to_list(ignored_terms)) |> validate_terms! |> validate_characters!(opts) |> validate_case!(data, base_iri, opts) case_separated_terms = group_terms_by_case(terms) lowercased_terms = Map.get(case_separated_terms, :lowercased, %{}) quote do vocabdoc = Module.delete_attribute(__MODULE__, :vocabdoc) defmodule unquote(name) do @moduledoc vocabdoc @behaviour Elixir.RDF.Namespace import Kernel, except: [ min: 2, max: 2, div: 2, rem: 2, abs: 1, ceil: 1, floor: 1, elem: 2, send: 2, apply: 2, destructure: 2, get_and_update_in: 2, get_in: 2, pop_in: 2, put_in: 2, put_elem: 2, update_in: 2, raise: 2, reraise: 2, inspect: 2, struct: 1, struct: 2, use: 1, use: 2 ] if unquote(file) do @external_resource unquote(file) end @base_iri unquote(base_iri) @spec __base_iri__ :: String.t() def __base_iri__, do: @base_iri @strict unquote(strict) @spec __strict__ :: boolean def __strict__, do: @strict @terms unquote(Macro.escape(terms)) @impl Elixir.RDF.Namespace def __terms__, do: @terms |> Map.keys() @spec __term_aliases__ :: [atom] def __term_aliases__ do @terms |> Enum.filter(fn {_, term} -> term != true end) |> Enum.map(fn {alias, _} -> alias end) end @ignored_terms unquote(Macro.escape(ignored_terms)) @doc """ Returns all known IRIs of the vocabulary. """ @spec __iris__ :: [Elixir.RDF.IRI.t()] def __iris__ do @terms |> Enum.map(fn {term, true} -> term_to_iri(@base_iri, term) {_alias, term} -> term_to_iri(@base_iri, term) end) |> Enum.uniq() end define_vocab_terms(unquote(lowercased_terms), unquote(base_iri)) @impl Elixir.RDF.Namespace @dialyzer {:nowarn_function, __resolve_term__: 1} def __resolve_term__(term) do case @terms[term] do nil -> if @strict or MapSet.member?(@ignored_terms, term) do {:error, %Elixir.RDF.Namespace.UndefinedTermError{ message: "undefined term #{term} in strict vocabulary #{__MODULE__}" }} else {:ok, term_to_iri(@base_iri, term)} end true -> {:ok, term_to_iri(@base_iri, term)} original_term -> {:ok, term_to_iri(@base_iri, original_term)} end end if not @strict do def unquote(:"$handle_undefined_function")(term, []) do if MapSet.member?(@ignored_terms, term) do raise UndefinedFunctionError else term_to_iri(@base_iri, term) end end def unquote(:"$handle_undefined_function")(term, [subject | objects]) do objects = case objects do [objects] when is_list(objects) -> objects _ -> objects end if MapSet.member?(@ignored_terms, term) do raise UndefinedFunctionError else case subject do %Description{} -> subject _ -> Description.new(subject) end |> Description.add({term_to_iri(@base_iri, term), objects}) end end end end end end @doc false defmacro define_vocab_terms(terms, base_iri) do terms |> Stream.filter(fn {term, true} -> valid_term?(term) {_, _} -> true end) |> Stream.map(fn {term, true} -> {term, term} {term, original_term} -> {term, original_term} end) |> Enum.map(fn {term, iri_suffix} -> iri = term_to_iri(base_iri, iri_suffix) quote do @doc "<#{unquote(to_string(iri))}>" def unquote(term)(), do: unquote(Macro.escape(iri)) @doc "`RDF.Description` builder for `#{unquote(term)}/0`" def unquote(term)(subject, object) def unquote(term)(%Description{} = subject, object) do Description.add(subject, {unquote(Macro.escape(iri)), object}) end def unquote(term)(subject, object) do Description.new(subject, init: {unquote(Macro.escape(iri)), object}) end # Is there a better way to support multiple objects via arguments? @doc false def unquote(term)(subject, o1, o2), do: unquote(term)(subject, [o1, o2]) @doc false def unquote(term)(subject, o1, o2, o3), do: unquote(term)(subject, [o1, o2, o3]) @doc false def unquote(term)(subject, o1, o2, o3, o4), do: unquote(term)(subject, [o1, o2, o3, o4]) @doc false def unquote(term)(subject, o1, o2, o3, o4, o5), do: unquote(term)(subject, [o1, o2, o3, o4, o5]) end end) end defp strict?(opts), do: Keyword.get(opts, :strict, true) defp base_iri!(opts) do base_iri = Keyword.fetch!(opts, :base_iri) unless is_binary(base_iri) and String.ends_with?(base_iri, ~w[/ # .]) do raise RDF.Namespace.InvalidVocabBaseIRIError, "a base_iri without a trailing '/' or '#' is invalid" else base_iri end end defp source!(opts) do cond do Keyword.has_key?(opts, :file) -> {:data, filename!(opts) |> RDF.read_file!(base_iri: nil)} rdf_data = Keyword.get(opts, :data) -> {:data, raw_rdf_data(rdf_data)} terms = Keyword.get(opts, :terms) -> {:terms, terms_from_user_input!(terms)} true -> raise KeyError, key: ~w[terms data file], term: opts end end defp terms_from_user_input!(terms) do # TODO: find an alternative to Code.eval_quoted - We want to support that the terms can be given as sigils ... {terms, _} = Code.eval_quoted(terms, [], rdf_data_env()) Enum.map(terms, fn term when is_atom(term) -> term term when is_binary(term) -> String.to_atom(term) term -> raise RDF.Namespace.InvalidTermError, "'#{term}' is not a valid vocabulary term" end) end defp raw_rdf_data(%Description{} = rdf_data), do: rdf_data defp raw_rdf_data(%RDF.Graph{} = rdf_data), do: rdf_data defp raw_rdf_data(%RDF.Dataset{} = rdf_data), do: rdf_data defp raw_rdf_data(rdf_data) do # TODO: find an alternative to Code.eval_quoted {rdf_data, _} = Code.eval_quoted(rdf_data, [], rdf_data_env()) rdf_data end defp ignored_terms!(opts) do # TODO: find an alternative to Code.eval_quoted - We want to support that the terms can be given as sigils ... with terms = Keyword.get(opts, :ignore, []) do {terms, _} = Code.eval_quoted(terms, [], rdf_data_env()) terms |> Enum.map(fn term when is_atom(term) -> term term when is_binary(term) -> String.to_atom(term) term -> raise RDF.Namespace.InvalidTermError, inspect(term) end) |> MapSet.new() end end defp term_mapping!(terms, opts) do terms = Map.new(terms, fn term when is_atom(term) -> {term, true} term -> {String.to_atom(term), true} end) Keyword.get(opts, :alias, []) |> Enum.reduce(terms, fn {alias, original_term}, terms -> term = String.to_atom(original_term) cond do not valid_characters?(alias) -> raise RDF.Namespace.InvalidAliasError, "alias '#{alias}' contains invalid characters" Map.get(terms, alias) == true -> raise RDF.Namespace.InvalidAliasError, "alias '#{alias}' already defined" strict?(opts) and not Map.has_key?(terms, term) -> raise RDF.Namespace.InvalidAliasError, "term '#{original_term}' is not a term in this vocabulary" Map.get(terms, term, true) != true -> raise RDF.Namespace.InvalidAliasError, "'#{original_term}' is already an alias" true -> Map.put(terms, alias, to_string(original_term)) end end) end defp aliased_terms(terms) do terms |> Map.values() |> MapSet.new() |> MapSet.delete(true) |> Enum.map(&String.to_atom/1) end @invalid_terms MapSet.new(~w[ and or xor in fn def defp defdelegate defexception defguard defguardp defimpl defmacro defmacrop defmodule defoverridable defprotocol defstruct function_exported? macro_exported? when if unless for case with quote unquote unquote_splicing alias import require super __aliases__ ]a) def invalid_terms, do: @invalid_terms defp validate_terms!(terms) do with aliased_terms = aliased_terms(terms) do for {term, _} <- terms, term not in aliased_terms and not valid_term?(term) do term end |> handle_invalid_terms! end terms end defp valid_term?(term), do: term not in @invalid_terms defp handle_invalid_terms!([]), do: nil defp handle_invalid_terms!(invalid_terms) do raise RDF.Namespace.InvalidTermError, """ The following terms can not be used, because they conflict with the Elixir semantics: - #{Enum.join(invalid_terms, "\n- ")} You have the following options: - define an alias with the :alias option on defvocab - ignore the resource with the :ignore option on defvocab """ end defp validate_characters!(terms, opts) do if (handling = Keyword.get(opts, :invalid_characters, :fail)) == :ignore do terms else terms |> detect_invalid_characters |> handle_invalid_characters(handling, terms) end end defp detect_invalid_characters(terms) do with aliased_terms = aliased_terms(terms) do for {term, _} <- terms, term not in aliased_terms and not valid_characters?(term), do: term end end defp handle_invalid_characters([], _, terms), do: terms defp handle_invalid_characters(invalid_terms, :fail, _) do raise RDF.Namespace.InvalidTermError, """ The following terms contain invalid characters: - #{Enum.join(invalid_terms, "\n- ")} You have the following options: - if you are in control of the vocabulary, consider renaming the resource - define an alias with the :alias option on defvocab - change the handling of invalid characters with the :invalid_characters option on defvocab - ignore the resource with the :ignore option on defvocab """ end defp handle_invalid_characters(invalid_terms, :warn, terms) do Enum.each(invalid_terms, fn term -> IO.warn("'#{term}' is not valid term, since it contains invalid characters") end) terms end defp valid_characters?(term) when is_atom(term), do: valid_characters?(Atom.to_string(term)) defp valid_characters?(term), do: Regex.match?(~r/^[a-zA-Z_]\w*$/, term) defp validate_case!(terms, nil, _, _), do: terms defp validate_case!(terms, data, base_iri, opts) do if (handling = Keyword.get(opts, :case_violations, :warn)) == :ignore do terms else terms |> detect_case_violations(data, base_iri) |> group_case_violations |> handle_case_violations(handling, terms, base_iri, opts) end end defp detect_case_violations(terms, data, base_iri) do aliased_terms = aliased_terms(terms) terms |> Enum.filter(fn {term, _} -> not (Atom.to_string(term) |> String.starts_with?("_")) end) |> Enum.filter(fn {term, true} -> if term not in aliased_terms do improper_case?(term, base_iri, Atom.to_string(term), data) end {term, original_term} -> improper_case?(term, base_iri, original_term, data) end) end defp improper_case?(term, base_iri, iri_suffix, data) do case ResourceClassifier.property?(term_to_iri(base_iri, iri_suffix), data) do true -> not downcase?(term) false -> downcase?(term) nil -> downcase?(term) end end defp group_case_violations(violations) do violations |> Enum.group_by(fn {term, true} -> if downcase?(term), do: :lowercased_term, else: :capitalized_term {term, _original} -> if downcase?(term), do: :lowercased_alias, else: :capitalized_alias end) end defp handle_case_violations(%{} = violations, _, terms, _, _) when map_size(violations) == 0, do: terms defp handle_case_violations(violations, :fail, _, base_iri, _) do resource_name_violations = fn violations -> violations |> Enum.map(fn {term, true} -> term_to_iri(base_iri, term) end) |> Enum.map(&to_string/1) |> Enum.join("\n- ") end alias_violations = fn violations -> violations |> Enum.map(fn {term, original} -> "alias #{term} for #{term_to_iri(base_iri, original)}" end) |> Enum.join("\n- ") end violation_error_lines = violations |> Enum.map(fn {:capitalized_term, violations} -> """ Terms for properties should be lowercased, but the following properties are capitalized: - #{resource_name_violations.(violations)} """ {:lowercased_term, violations} -> """ Terms for non-property resource should be capitalized, but the following non-properties are lowercased: - #{resource_name_violations.(violations)} """ {:capitalized_alias, violations} -> """ Terms for properties should be lowercased, but the following aliases for properties are capitalized: - #{alias_violations.(violations)} """ {:lowercased_alias, violations} -> """ Terms for non-property resource should be capitalized, but the following aliases for non-properties are lowercased: - #{alias_violations.(violations)} """ end) |> Enum.join() raise RDF.Namespace.InvalidTermError, """ Case violations detected #{violation_error_lines} You have the following options: - if you are in control of the vocabulary, consider renaming the resource - define a properly cased alias with the :alias option on defvocab - change the handling of case violations with the :case_violations option on defvocab - ignore the resource with the :ignore option on defvocab """ end defp handle_case_violations(violations, :warn, terms, base_iri, _) do for {type, violations} <- violations, {term, original} <- violations do case_violation_warning(type, term, original, base_iri) end terms end defp case_violation_warning(:capitalized_term, term, _, base_iri) do IO.warn("'#{term_to_iri(base_iri, term)}' is a capitalized property") end defp case_violation_warning(:lowercased_term, term, _, base_iri) do IO.warn("'#{term_to_iri(base_iri, term)}' is a lowercased non-property resource") end defp case_violation_warning(:capitalized_alias, term, _, _) do IO.warn("capitalized alias '#{term}' for a property") end defp case_violation_warning(:lowercased_alias, term, _, _) do IO.warn("lowercased alias '#{term}' for a non-property resource") end defp filename!(opts) do if filename = Keyword.get(opts, :file) do cond do File.exists?(filename) -> filename File.exists?(expanded_filename = Path.expand(filename, @vocabs_dir)) -> expanded_filename true -> raise File.Error, path: filename, action: "find", reason: :enoent end end end defp rdf_data_env do import RDF.Sigils, warn: false __ENV__ end defp rdf_data_vocab_terms(data, base_iri) do data |> RDF.Data.resources() |> Stream.filter(fn %RDF.IRI{} -> true _ -> false end) |> Stream.map(&to_string/1) |> Stream.map(&strip_base_iri(&1, base_iri)) |> Stream.filter(&vocab_term?/1) |> Enum.map(&String.to_atom/1) end defp group_terms_by_case(terms) do terms |> Enum.group_by(fn {term, _} -> if downcase?(term), do: :lowercased, else: :capitalized end) |> Map.new(fn {group, term_mapping} -> {group, Map.new(term_mapping)} end) end defp strip_base_iri(iri, base_iri) do if String.starts_with?(iri, base_iri) do String.replace_prefix(iri, base_iri, "") end end defp vocab_term?(""), do: false defp vocab_term?(term) when is_binary(term) do not String.contains?(term, "/") end defp vocab_term?(_), do: false @doc false @spec term_to_iri(String.t(), String.t() | atom) :: RDF.IRI.t() def term_to_iri(base_iri, term) when is_atom(term), do: term_to_iri(base_iri, Atom.to_string(term)) def term_to_iri(base_iri, term), do: RDF.iri(base_iri <> term) @doc false @spec vocabulary_namespace?(module) :: boolean def vocabulary_namespace?(name) do case Code.ensure_compiled(name) do {:module, name} -> function_exported?(name, :__base_iri__, 0) _ -> false end end end