core: detect and handle case-violations of vocabulary terms

This commit is contained in:
Marcel Otto 2017-05-29 23:12:50 +02:00
parent 0581a45820
commit 19c84b7ea2
5 changed files with 411 additions and 66 deletions

View file

@ -121,8 +121,10 @@ defmodule RDF do
# temporary manual RDF vocab definitions # temporary manual RDF vocab definitions
# TODO: These should be defined as a vocabulary # TODO: These should be defined as a vocabulary
def langString do @rdf_type URI.parse("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")
uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#langString") def type, do: @rdf_type
end
@rdf_langString URI.parse("http://www.w3.org/1999/02/22-rdf-syntax-ns#langString")
def langString, do: @rdf_langString
end end

View file

@ -17,7 +17,11 @@ defmodule RDF.NS do
""" """
defvocab RDF, defvocab RDF,
base_uri: "http://www.w3.org/1999/02/22-rdf-syntax-ns#", base_uri: "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
file: "rdf.nt" file: "rdf.nt",
alias: [
Nil: "nil",
LangString: "langString"
]
@vocabdoc """ @vocabdoc """
The RDFS vocabulary. The RDFS vocabulary.

View file

@ -0,0 +1,67 @@
defmodule RDF.Utils.ResourceClassifier do
alias RDF.Graph # TODO: use RDF.Data instead
alias RDF.Description
@doc """
Determines if the given resource is RDF property by
"""
def property?(resource, data) do
with %Description{} = description <- Graph.description(data, resource) do
property_by_domain?(description) or
property_by_rdf_type?(Description.get(description, RDF.type))
end
# || property_by_predicate_usage?(resource, data)
end
@property_properties ~w[
http://www.w3.org/2000/01/rdf-schema#domain
http://www.w3.org/2000/01/rdf-schema#range
http://www.w3.org/2000/01/rdf-schema#subPropertyOf
http://www.w3.org/2002/07/owl#equivalentProperty
http://www.w3.org/2002/07/owl#propertyDisjointWith
]
|> Enum.map(&RDF.uri/1)
|> MapSet.new
defp property_by_domain?(description) do
Enum.any? @property_properties, fn property ->
description[property]
end
end
@property_classes ~w[
http://www.w3.org/1999/02/22-rdf-syntax-ns#Property
http://www.w3.org/2000/01/rdf-schema#ContainerMembershipProperty
http://www.w3.org/2002/07/owl#ObjectProperty
http://www.w3.org/2002/07/owl#DatatypeProperty
http://www.w3.org/2002/07/owl#AnnotationProperty
http://www.w3.org/2002/07/owl#FunctionalProperty
http://www.w3.org/2002/07/owl#InverseFunctionalProperty
http://www.w3.org/2002/07/owl#SymmetricProperty
http://www.w3.org/2002/07/owl#AsymmetricProperty
http://www.w3.org/2002/07/owl#ReflexiveProperty
http://www.w3.org/2002/07/owl#IrreflexiveProperty
http://www.w3.org/2002/07/owl#TransitiveProperty
http://www.w3.org/2002/07/owl#DeprecatedProperty
]
|> Enum.map(&RDF.uri/1)
|> MapSet.new
defp property_by_rdf_type?(nil), do: nil
defp property_by_rdf_type?(types) do
not (
types
|> MapSet.new
|> MapSet.disjoint?(@property_classes)
)
end
# defp property_by_predicate_usage?(resource, data) do
# resource in Graph.predicates(data) || nil
# end
end

View file

@ -52,10 +52,20 @@ defmodule RDF.Vocabulary.Namespace do
Defines a `RDF.Namespace` module for a RDF vocabulary. Defines a `RDF.Namespace` module for a RDF vocabulary.
""" """
defmacro defvocab(name, opts) do defmacro defvocab(name, opts) do
strict = strict?(opts)
base_uri = base_uri!(opts) base_uri = base_uri!(opts)
file = filename!(opts) file = filename!(opts)
terms = terms!(opts) |> term_mapping!(opts) |> validate_terms!(opts) {terms, data} =
strict = strict?(opts) case source!(opts) do
{:terms, terms} -> {terms, nil}
{:data, data} -> {rdf_data_vocab_terms(data, base_uri), data}
end #|> IO.inspect()
terms =
terms
|> term_mapping!(opts)
|> validate_terms!(opts)
|> validate_case!(data, base_uri, opts)
case_separated_terms = group_terms_by_case(terms) case_separated_terms = group_terms_by_case(terms)
lowercased_terms = Map.get(case_separated_terms, :lowercased, %{}) lowercased_terms = Map.get(case_separated_terms, :lowercased, %{})
@ -181,43 +191,38 @@ defmodule RDF.Vocabulary.Namespace do
end end
end end
def terms!(opts) do defp source!(opts) do
cond do cond do
Keyword.has_key?(opts, :file) -> Keyword.has_key?(opts, :file) -> {:data, filename!(opts) |> load_file}
filename!(opts) rdf_data = Keyword.get(opts, :data) -> {:data, raw_rdf_data(rdf_data)}
|> load_file terms = Keyword.get(opts, :terms) -> {:terms, terms_from_user_input!(terms)}
|> terms_from_rdf_data!(opts)
rdf_data = Keyword.get(opts, :data) ->
terms_from_rdf_data!(rdf_data, opts)
terms = Keyword.get(opts, :terms) ->
# TODO: find an alternative to Code.eval_quoted - We want to support that the terms can be given as sigils ...
{terms, _ } = Code.eval_quoted(terms, [], rdf_data_env())
terms
|> Enum.map(fn
term when is_atom(term) -> term
term when is_binary(term) -> String.to_atom(term)
term ->
raise RDF.Namespace.InvalidTermError,
"'#{term}' is not a valid vocabulary term"
end)
true -> true ->
raise KeyError, key: ~w[terms data file], term: opts raise KeyError, key: ~w[terms data file], term: opts
end end
end end
# TODO: support also RDF.Datasets ... defp terms_from_user_input!(terms) do
defp terms_from_rdf_data!(%RDF.Graph{} = rdf_data, opts) do # TODO: find an alternative to Code.eval_quoted - We want to support that the terms can be given as sigils ...
rdf_data_vocab_terms(rdf_data, Keyword.fetch!(opts, :base_uri)) {terms, _ } = Code.eval_quoted(terms, [], rdf_data_env())
Enum.map terms, fn
term when is_atom(term) -> term
term when is_binary(term) -> String.to_atom(term)
term ->
raise RDF.Namespace.InvalidTermError,
"'#{term}' is not a valid vocabulary term"
end
end end
defp terms_from_rdf_data!(rdf_data, opts) do # TODO: support also RDF.Datasets ...
defp raw_rdf_data(%RDF.Graph{} = rdf_data), do: rdf_data
defp raw_rdf_data(rdf_data) do
# TODO: find an alternative to Code.eval_quoted # TODO: find an alternative to Code.eval_quoted
{rdf_data, _} = Code.eval_quoted(rdf_data, [], rdf_data_env()) {rdf_data, _} = Code.eval_quoted(rdf_data, [], rdf_data_env())
terms_from_rdf_data!(rdf_data, opts) rdf_data
end end
def term_mapping!(terms, opts) do defp term_mapping!(terms, opts) do
terms = Map.new terms, fn terms = Map.new terms, fn
term when is_atom(term) -> {term, true} term when is_atom(term) -> {term, true}
term -> {String.to_atom(term), true} term -> {String.to_atom(term), true}
@ -248,33 +253,36 @@ defmodule RDF.Vocabulary.Namespace do
end) end)
end end
defp aliased_terms(terms) do
terms
|> Map.values
|> MapSet.new
|> MapSet.delete(true)
|> Enum.map(&String.to_atom/1)
end
defp validate_terms!(terms, opts) do defp validate_terms!(terms, opts) do
if (handling = Keyword.get(opts, :invalid_characters, :fail)) == :ignore do if (handling = Keyword.get(opts, :invalid_characters, :fail)) == :ignore do
terms terms
else else
terms terms
|> detect_invalid_terms(opts) |> detect_invalid_terms
|> handle_invalid_terms(handling, terms, opts) |> handle_invalid_terms(handling, terms)
end end
end end
defp detect_invalid_terms(terms, _opts) do defp detect_invalid_terms(terms) do
aliased = aliased_terms = aliased_terms(terms)
terms Enum.filter_map terms,
|> Map.values fn {term, _} ->
|> MapSet.new not term in aliased_terms and not valid_term?(term)
|> MapSet.delete(true) end,
|> Enum.map(&String.to_atom/1) fn {term, _} -> term end
terms
|> Stream.filter(fn {term, _} ->
not valid_term?(term) and not term in aliased
end)
|> Enum.map(fn {term, _} -> term end)
end end
defp handle_invalid_terms([], _, terms, _), do: terms defp handle_invalid_terms([], _, terms), do: terms
defp handle_invalid_terms(invalid_terms, :fail, _, _) do defp handle_invalid_terms(invalid_terms, :fail, _) do
raise RDF.Namespace.InvalidTermError, """ raise RDF.Namespace.InvalidTermError, """
The following terms contain invalid characters: The following terms contain invalid characters:
@ -288,16 +296,155 @@ defmodule RDF.Vocabulary.Namespace do
""" """
end end
defp handle_invalid_terms(invalid_terms, :warn, terms, _) do defp handle_invalid_terms(invalid_terms, :warn, terms) do
Enum.each invalid_terms, fn term -> Enum.each invalid_terms, fn term ->
IO.warn "'#{term}' is not valid term, since it contains invalid characters" IO.warn "'#{term}' is not valid term, since it contains invalid characters"
end end
terms terms
end end
defp valid_term?(nil), do: true defp valid_term?(term) when is_atom(term),
defp valid_term?(term) do do: valid_term?(Atom.to_string(term))
Regex.match?(~r/^[a-zA-Z_]\w*$/, to_string(term)) defp valid_term?(term),
do: Regex.match?(~r/^[a-zA-Z_]\w*$/, term)
defp validate_case!(terms, nil, _, _), do: terms
defp validate_case!(terms, data, base_uri, opts) do
if (handling = Keyword.get(opts, :case_violations, :warn)) == :ignore do
terms
else
terms
|> detect_case_violations(data, base_uri)
|> group_case_violations
|> handle_case_violations(handling, terms, base_uri, opts)
end
end
defp detect_case_violations(terms, data, base_uri) do
aliased_terms = aliased_terms(terms)
Enum.filter terms, fn
{term, true} ->
if not term in aliased_terms do
proper_case?(term, base_uri, Atom.to_string(term), data)
end
{term, original_term} ->
proper_case?(term, base_uri, original_term, data)
end
end
defp proper_case?(term, base_uri, uri_suffix, data) do
case RDF.Utils.ResourceClassifier.property?(term_to_uri(base_uri, uri_suffix), data) do
true -> not lowercase?(term)
false -> lowercase?(term)
nil -> lowercase?(term)
end
end
defp group_case_violations(violations) do
violations
|> Enum.group_by(fn
{term, true} ->
if lowercase?(term),
do: :lowercased_term,
else: :capitalized_term
{term, _original} ->
if lowercase?(term),
do: :lowercased_alias,
else: :capitalized_alias
end)
end
defp handle_case_violations(%{} = violations, _, terms, _, _) when map_size(violations) == 0,
do: terms
defp handle_case_violations(violations, :fail, _, base_uri, _) do
resource_name_violations = fn violations ->
violations
|> Enum.map(fn {term, true} -> term_to_uri(base_uri, term) end)
|> Enum.map(&to_string/1)
|> Enum.join("\n- ")
end
alias_violations = fn violations ->
violations
|> Enum.map(fn {term, original} ->
"alias #{term} for #{term_to_uri(base_uri, original)}"
end)
|> Enum.join("\n- ")
end
violation_error_lines =
violations
|> Enum.map(fn
{:capitalized_term, violations} ->
"""
Terms for properties should be lowercased, but the following properties are
capitalized:
- #{resource_name_violations.(violations)}
"""
{:lowercased_term, violations} ->
"""
Terms for non-property resource should be capitalized, but the following
non-properties are lowercased:
- #{resource_name_violations.(violations)}
"""
{:capitalized_alias, violations} ->
"""
Terms for properties should be lowercased, but the following aliases for
properties are capitalized:
- #{alias_violations.(violations)}
"""
{:lowercased_alias, violations} ->
"""
Terms for non-property resource should be capitalized, but the following
aliases for non-properties are lowercased:
- #{alias_violations.(violations)}
"""
end)
|> Enum.join
raise RDF.Namespace.InvalidTermError, """
Case violations detected
#{violation_error_lines}
You have the following options:
- if you are in control of the vocabulary, consider renaming the resource
- define a properly cased alias with the :alias option on defvocab
- change the handling of case violations with the :case_violations option on defvocab
"""
end
defp handle_case_violations(violations, :warn, terms, base_uri, _) do
for {type, violations} <- violations,
{term, original} <- violations do
case_violation_warning(type, term, original, base_uri)
end
terms
end
defp case_violation_warning(:capitalized_term, term, _, base_uri) do
IO.warn "'#{term_to_uri(base_uri, term)}' is a capitalized property"
end
defp case_violation_warning(:lowercased_term, term, _, base_uri) do
IO.warn "'#{term_to_uri(base_uri, term)}' is a lowercased non-property resource"
end
defp case_violation_warning(:capitalized_alias, term, _, _) do
IO.warn "capitalized alias '#{term}' for a property"
end
defp case_violation_warning(:lowercased_alias, term, _, base_uri) do
IO.warn "lowercased alias '#{term}' for a non-property resource"
end end
@ -319,6 +466,7 @@ defmodule RDF.Vocabulary.Namespace do
end end
defp rdf_data_env do defp rdf_data_env do
import RDF.Sigils
__ENV__ __ENV__
end end
@ -330,7 +478,7 @@ defmodule RDF.Vocabulary.Namespace do
%URI{} -> true %URI{} -> true
_ -> false _ -> false
end) end)
|> Stream.map(&to_string/1) |> Stream.map(&URI.to_string/1)
|> Stream.map(&(strip_base_uri(&1, base_uri))) |> Stream.map(&(strip_base_uri(&1, base_uri)))
|> Stream.filter(&vocab_term?/1) |> Stream.filter(&vocab_term?/1)
|> Enum.map(&String.to_atom/1) |> Enum.map(&String.to_atom/1)
@ -366,8 +514,9 @@ defmodule RDF.Vocabulary.Namespace do
defp vocab_term?(_), do: false defp vocab_term?(_), do: false
@doc false @doc false
def term_to_uri(base_uri, term) do def term_to_uri(base_uri, term) when is_atom(term),
URI.parse(base_uri <> to_string(term)) do: term_to_uri(base_uri, Atom.to_string(term))
end def term_to_uri(base_uri, term),
do: URI.parse(base_uri <> term)
end end

View file

@ -3,8 +3,9 @@ defmodule RDF.Vocabulary.NamespaceTest do
doctest RDF.Vocabulary.Namespace doctest RDF.Vocabulary.Namespace
alias RDF.Description import RDF.Sigils
alias RDF.Description
defmodule TestNS do defmodule TestNS do
use RDF.Vocabulary.Namespace use RDF.Vocabulary.Namespace
@ -20,8 +21,8 @@ defmodule RDF.Vocabulary.NamespaceTest do
defvocab Example1, defvocab Example1,
base_uri: "http://example.com/example1#", base_uri: "http://example.com/example1#",
data: RDF.Graph.new([ data: RDF.Graph.new([
{"http://example.com/example1#foo", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "http://www.w3.org/1999/02/22-rdf-syntax-ns#Property"}, {~I<http://example.com/example1#foo>, ~I<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>, ~I<http://www.w3.org/1999/02/22-rdf-syntax-ns#Property>},
{"http://example.com/example1#Bar", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "http://www.w3.org/2000/01/rdf-schema#Resource"} {~I<http://example.com/example1#Bar>, ~I<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>, ~I<http://www.w3.org/2000/01/rdf-schema#Resource>}
]) ])
defvocab Example2, defvocab Example2,
@ -104,6 +105,22 @@ defmodule RDF.Vocabulary.NamespaceTest do
end end
end end
test "special terms" do
defmodule NSEdge do
use RDF.Vocabulary.Namespace
defvocab Example,
base_uri: "http://example.com/ex#",
terms: ~w[nil true false]
end
alias NSEdge.Example
assert Example.nil == ~I<http://example.com/ex#nil>
assert Example.true == ~I<http://example.com/ex#true>
assert Example.false == ~I<http://example.com/ex#false>
end
test "when the given file not found, an error is raised" do test "when the given file not found, an error is raised" do
assert_raise File.Error, fn -> assert_raise File.Error, fn ->
defmodule BadNS5 do defmodule BadNS5 do
@ -116,7 +133,8 @@ defmodule RDF.Vocabulary.NamespaceTest do
end end
end end
test "when the alias contains invalid characters term, an error is raised" do
test "when an alias contains invalid characters, an error is raised" do
assert_raise RDF.Namespace.InvalidAliasError, fn -> assert_raise RDF.Namespace.InvalidAliasError, fn ->
defmodule BadNS12 do defmodule BadNS12 do
use RDF.Vocabulary.Namespace use RDF.Vocabulary.Namespace
@ -129,7 +147,6 @@ defmodule RDF.Vocabulary.NamespaceTest do
end end
end end
test "when trying to map an already existing term, an error is raised" do test "when trying to map an already existing term, an error is raised" do
assert_raise RDF.Namespace.InvalidAliasError, fn -> assert_raise RDF.Namespace.InvalidAliasError, fn ->
defmodule BadNS6 do defmodule BadNS6 do
@ -245,14 +262,120 @@ defmodule RDF.Vocabulary.NamespaceTest do
end end
test "when a term contains unallowed characters it does not fail when invalid_characters = :ignore" do test "when a term contains unallowed characters it does not fail when invalid_characters = :ignore" do
defmodule BadNS11 do defmodule BadNS11 do
use RDF.Vocabulary.Namespace use RDF.Vocabulary.Namespace
defvocab Example, defvocab Example,
base_uri: "http://example.com/example#", base_uri: "http://example.com/example#",
terms: ~w[Foo-bar foo-bar], terms: ~w[Foo-bar foo-bar],
invalid_characters: :ignore invalid_characters: :ignore
end end
end end
end
describe "case violation handling" do
test "aliases can fix case violations" do
defmodule NS23 do
use RDF.Vocabulary.Namespace
defvocab Example,
base_uri: "http://example.com/ex#",
case_violations: :fail,
data: RDF.Graph.new([
{~I<http://example.com/ex#Foo>, ~I<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>, ~I<http://www.w3.org/1999/02/22-rdf-syntax-ns#Property>},
{~I<http://example.com/ex#bar>, ~I<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>, ~I<http://www.w3.org/2000/01/rdf-schema#Resource>}
]),
alias: [
foo: "Foo",
Bar: "bar",
]
end
end
test "when case_violations == :ignore is set, case violations are ignored" do
defmodule NS24 do
use RDF.Vocabulary.Namespace
defvocab Example,
base_uri: "http://example.com/ex#",
case_violations: :ignore,
data: RDF.Graph.new([
{~I<http://example.com/ex#Foo>, ~I<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>, ~I<http://www.w3.org/1999/02/22-rdf-syntax-ns#Property>},
{~I<http://example.com/ex#bar>, ~I<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>, ~I<http://www.w3.org/2000/01/rdf-schema#Resource>}
]),
alias: [
foo: "Foo",
Bar: "bar",
]
end
end
test "a capitalized property without an alias and :case_violations == :fail, raises an error" do
assert_raise RDF.Namespace.InvalidTermError, ~r<http://example\.com/ex#Foo>s, fn ->
defmodule BadNS13 do
use RDF.Vocabulary.Namespace
defvocab Example,
base_uri: "http://example.com/ex#",
case_violations: :fail,
data: RDF.Graph.new([
{~I<http://example.com/ex#Foo>, ~I<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>, ~I<http://www.w3.org/1999/02/22-rdf-syntax-ns#Property>},
])
end
end
end
test "a lowercased non-property without an alias and :case_violations == :fail, raises an error" do
assert_raise RDF.Namespace.InvalidTermError, ~r<http://example\.com/ex#bar>s, fn ->
defmodule BadNS14 do
use RDF.Vocabulary.Namespace
defvocab Example,
base_uri: "http://example.com/ex#",
case_violations: :fail,
data: RDF.Graph.new([
{~I<http://example.com/ex#bar>, ~I<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>, ~I<http://www.w3.org/2000/01/rdf-schema#Resource>}
])
end
end
end
test "a capitalized alias for a property and :case_violations == :fail, raises an error" do
assert_raise RDF.Namespace.InvalidTermError, fn ->
defmodule BadNS15 do
use RDF.Vocabulary.Namespace
defvocab Example,
base_uri: "http://example.com/ex#",
case_violations: :fail,
data: RDF.Graph.new([
{~I<http://example.com/ex#foo>, ~I<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>, ~I<http://www.w3.org/1999/02/22-rdf-syntax-ns#Property>},
]),
alias: [Foo: "foo"]
end
end
end
test "a lowercased alias for a non-property and :case_violations == :fail, raises an error" do
assert_raise RDF.Namespace.InvalidTermError, fn ->
defmodule BadNS16 do
use RDF.Vocabulary.Namespace
defvocab Example,
base_uri: "http://example.com/ex#",
case_violations: :fail,
data: RDF.Graph.new([
{~I<http://example.com/ex#Bar>, ~I<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>, ~I<http://www.w3.org/2000/01/rdf-schema#Resource>}
]),
alias: [bar: "Bar"]
end
end
end
end end