core: detect and handle case-violations of vocabulary terms

This commit is contained in:
Marcel Otto 2017-05-29 23:12:50 +02:00
parent 0581a45820
commit 19c84b7ea2
5 changed files with 411 additions and 66 deletions

View file

@ -121,8 +121,10 @@ defmodule RDF do
# temporary manual RDF vocab definitions
# TODO: These should be defined as a vocabulary
def langString do
uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#langString")
end
@rdf_type URI.parse("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")
def type, do: @rdf_type
@rdf_langString URI.parse("http://www.w3.org/1999/02/22-rdf-syntax-ns#langString")
def langString, do: @rdf_langString
end

View file

@ -17,7 +17,11 @@ defmodule RDF.NS do
"""
defvocab RDF,
base_uri: "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
file: "rdf.nt"
file: "rdf.nt",
alias: [
Nil: "nil",
LangString: "langString"
]
@vocabdoc """
The RDFS vocabulary.

View file

@ -0,0 +1,67 @@
defmodule RDF.Utils.ResourceClassifier do
alias RDF.Graph # TODO: use RDF.Data instead
alias RDF.Description
@doc """
Determines if the given resource is RDF property by
"""
def property?(resource, data) do
with %Description{} = description <- Graph.description(data, resource) do
property_by_domain?(description) or
property_by_rdf_type?(Description.get(description, RDF.type))
end
# || property_by_predicate_usage?(resource, data)
end
@property_properties ~w[
http://www.w3.org/2000/01/rdf-schema#domain
http://www.w3.org/2000/01/rdf-schema#range
http://www.w3.org/2000/01/rdf-schema#subPropertyOf
http://www.w3.org/2002/07/owl#equivalentProperty
http://www.w3.org/2002/07/owl#propertyDisjointWith
]
|> Enum.map(&RDF.uri/1)
|> MapSet.new
defp property_by_domain?(description) do
Enum.any? @property_properties, fn property ->
description[property]
end
end
@property_classes ~w[
http://www.w3.org/1999/02/22-rdf-syntax-ns#Property
http://www.w3.org/2000/01/rdf-schema#ContainerMembershipProperty
http://www.w3.org/2002/07/owl#ObjectProperty
http://www.w3.org/2002/07/owl#DatatypeProperty
http://www.w3.org/2002/07/owl#AnnotationProperty
http://www.w3.org/2002/07/owl#FunctionalProperty
http://www.w3.org/2002/07/owl#InverseFunctionalProperty
http://www.w3.org/2002/07/owl#SymmetricProperty
http://www.w3.org/2002/07/owl#AsymmetricProperty
http://www.w3.org/2002/07/owl#ReflexiveProperty
http://www.w3.org/2002/07/owl#IrreflexiveProperty
http://www.w3.org/2002/07/owl#TransitiveProperty
http://www.w3.org/2002/07/owl#DeprecatedProperty
]
|> Enum.map(&RDF.uri/1)
|> MapSet.new
defp property_by_rdf_type?(nil), do: nil
defp property_by_rdf_type?(types) do
not (
types
|> MapSet.new
|> MapSet.disjoint?(@property_classes)
)
end
# defp property_by_predicate_usage?(resource, data) do
# resource in Graph.predicates(data) || nil
# end
end

View file

@ -52,10 +52,20 @@ defmodule RDF.Vocabulary.Namespace do
Defines a `RDF.Namespace` module for a RDF vocabulary.
"""
defmacro defvocab(name, opts) do
strict = strict?(opts)
base_uri = base_uri!(opts)
file = filename!(opts)
terms = terms!(opts) |> term_mapping!(opts) |> validate_terms!(opts)
strict = strict?(opts)
{terms, data} =
case source!(opts) do
{:terms, terms} -> {terms, nil}
{:data, data} -> {rdf_data_vocab_terms(data, base_uri), data}
end #|> IO.inspect()
terms =
terms
|> term_mapping!(opts)
|> validate_terms!(opts)
|> validate_case!(data, base_uri, opts)
case_separated_terms = group_terms_by_case(terms)
lowercased_terms = Map.get(case_separated_terms, :lowercased, %{})
@ -181,43 +191,38 @@ defmodule RDF.Vocabulary.Namespace do
end
end
def terms!(opts) do
defp source!(opts) do
cond do
Keyword.has_key?(opts, :file) ->
filename!(opts)
|> load_file
|> terms_from_rdf_data!(opts)
rdf_data = Keyword.get(opts, :data) ->
terms_from_rdf_data!(rdf_data, opts)
terms = Keyword.get(opts, :terms) ->
# TODO: find an alternative to Code.eval_quoted - We want to support that the terms can be given as sigils ...
{terms, _ } = Code.eval_quoted(terms, [], rdf_data_env())
terms
|> Enum.map(fn
term when is_atom(term) -> term
term when is_binary(term) -> String.to_atom(term)
term ->
raise RDF.Namespace.InvalidTermError,
"'#{term}' is not a valid vocabulary term"
end)
Keyword.has_key?(opts, :file) -> {:data, filename!(opts) |> load_file}
rdf_data = Keyword.get(opts, :data) -> {:data, raw_rdf_data(rdf_data)}
terms = Keyword.get(opts, :terms) -> {:terms, terms_from_user_input!(terms)}
true ->
raise KeyError, key: ~w[terms data file], term: opts
end
end
# TODO: support also RDF.Datasets ...
defp terms_from_rdf_data!(%RDF.Graph{} = rdf_data, opts) do
rdf_data_vocab_terms(rdf_data, Keyword.fetch!(opts, :base_uri))
defp terms_from_user_input!(terms) do
# TODO: find an alternative to Code.eval_quoted - We want to support that the terms can be given as sigils ...
{terms, _ } = Code.eval_quoted(terms, [], rdf_data_env())
Enum.map terms, fn
term when is_atom(term) -> term
term when is_binary(term) -> String.to_atom(term)
term ->
raise RDF.Namespace.InvalidTermError,
"'#{term}' is not a valid vocabulary term"
end
end
defp terms_from_rdf_data!(rdf_data, opts) do
# TODO: support also RDF.Datasets ...
defp raw_rdf_data(%RDF.Graph{} = rdf_data), do: rdf_data
defp raw_rdf_data(rdf_data) do
# TODO: find an alternative to Code.eval_quoted
{rdf_data, _} = Code.eval_quoted(rdf_data, [], rdf_data_env())
terms_from_rdf_data!(rdf_data, opts)
rdf_data
end
def term_mapping!(terms, opts) do
defp term_mapping!(terms, opts) do
terms = Map.new terms, fn
term when is_atom(term) -> {term, true}
term -> {String.to_atom(term), true}
@ -248,33 +253,36 @@ defmodule RDF.Vocabulary.Namespace do
end)
end
defp aliased_terms(terms) do
terms
|> Map.values
|> MapSet.new
|> MapSet.delete(true)
|> Enum.map(&String.to_atom/1)
end
defp validate_terms!(terms, opts) do
if (handling = Keyword.get(opts, :invalid_characters, :fail)) == :ignore do
terms
else
terms
|> detect_invalid_terms(opts)
|> handle_invalid_terms(handling, terms, opts)
|> detect_invalid_terms
|> handle_invalid_terms(handling, terms)
end
end
defp detect_invalid_terms(terms, _opts) do
aliased =
terms
|> Map.values
|> MapSet.new
|> MapSet.delete(true)
|> Enum.map(&String.to_atom/1)
terms
|> Stream.filter(fn {term, _} ->
not valid_term?(term) and not term in aliased
end)
|> Enum.map(fn {term, _} -> term end)
defp detect_invalid_terms(terms) do
aliased_terms = aliased_terms(terms)
Enum.filter_map terms,
fn {term, _} ->
not term in aliased_terms and not valid_term?(term)
end,
fn {term, _} -> term end
end
defp handle_invalid_terms([], _, terms, _), do: terms
defp handle_invalid_terms([], _, terms), do: terms
defp handle_invalid_terms(invalid_terms, :fail, _, _) do
defp handle_invalid_terms(invalid_terms, :fail, _) do
raise RDF.Namespace.InvalidTermError, """
The following terms contain invalid characters:
@ -288,16 +296,155 @@ defmodule RDF.Vocabulary.Namespace do
"""
end
defp handle_invalid_terms(invalid_terms, :warn, terms, _) do
defp handle_invalid_terms(invalid_terms, :warn, terms) do
Enum.each invalid_terms, fn term ->
IO.warn "'#{term}' is not valid term, since it contains invalid characters"
end
terms
end
defp valid_term?(nil), do: true
defp valid_term?(term) do
Regex.match?(~r/^[a-zA-Z_]\w*$/, to_string(term))
defp valid_term?(term) when is_atom(term),
do: valid_term?(Atom.to_string(term))
defp valid_term?(term),
do: Regex.match?(~r/^[a-zA-Z_]\w*$/, term)
defp validate_case!(terms, nil, _, _), do: terms
defp validate_case!(terms, data, base_uri, opts) do
if (handling = Keyword.get(opts, :case_violations, :warn)) == :ignore do
terms
else
terms
|> detect_case_violations(data, base_uri)
|> group_case_violations
|> handle_case_violations(handling, terms, base_uri, opts)
end
end
defp detect_case_violations(terms, data, base_uri) do
aliased_terms = aliased_terms(terms)
Enum.filter terms, fn
{term, true} ->
if not term in aliased_terms do
proper_case?(term, base_uri, Atom.to_string(term), data)
end
{term, original_term} ->
proper_case?(term, base_uri, original_term, data)
end
end
defp proper_case?(term, base_uri, uri_suffix, data) do
case RDF.Utils.ResourceClassifier.property?(term_to_uri(base_uri, uri_suffix), data) do
true -> not lowercase?(term)
false -> lowercase?(term)
nil -> lowercase?(term)
end
end
defp group_case_violations(violations) do
violations
|> Enum.group_by(fn
{term, true} ->
if lowercase?(term),
do: :lowercased_term,
else: :capitalized_term
{term, _original} ->
if lowercase?(term),
do: :lowercased_alias,
else: :capitalized_alias
end)
end
defp handle_case_violations(%{} = violations, _, terms, _, _) when map_size(violations) == 0,
do: terms
defp handle_case_violations(violations, :fail, _, base_uri, _) do
resource_name_violations = fn violations ->
violations
|> Enum.map(fn {term, true} -> term_to_uri(base_uri, term) end)
|> Enum.map(&to_string/1)
|> Enum.join("\n- ")
end
alias_violations = fn violations ->
violations
|> Enum.map(fn {term, original} ->
"alias #{term} for #{term_to_uri(base_uri, original)}"
end)
|> Enum.join("\n- ")
end
violation_error_lines =
violations
|> Enum.map(fn
{:capitalized_term, violations} ->
"""
Terms for properties should be lowercased, but the following properties are
capitalized:
- #{resource_name_violations.(violations)}
"""
{:lowercased_term, violations} ->
"""
Terms for non-property resource should be capitalized, but the following
non-properties are lowercased:
- #{resource_name_violations.(violations)}
"""
{:capitalized_alias, violations} ->
"""
Terms for properties should be lowercased, but the following aliases for
properties are capitalized:
- #{alias_violations.(violations)}
"""
{:lowercased_alias, violations} ->
"""
Terms for non-property resource should be capitalized, but the following
aliases for non-properties are lowercased:
- #{alias_violations.(violations)}
"""
end)
|> Enum.join
raise RDF.Namespace.InvalidTermError, """
Case violations detected
#{violation_error_lines}
You have the following options:
- if you are in control of the vocabulary, consider renaming the resource
- define a properly cased alias with the :alias option on defvocab
- change the handling of case violations with the :case_violations option on defvocab
"""
end
defp handle_case_violations(violations, :warn, terms, base_uri, _) do
for {type, violations} <- violations,
{term, original} <- violations do
case_violation_warning(type, term, original, base_uri)
end
terms
end
defp case_violation_warning(:capitalized_term, term, _, base_uri) do
IO.warn "'#{term_to_uri(base_uri, term)}' is a capitalized property"
end
defp case_violation_warning(:lowercased_term, term, _, base_uri) do
IO.warn "'#{term_to_uri(base_uri, term)}' is a lowercased non-property resource"
end
defp case_violation_warning(:capitalized_alias, term, _, _) do
IO.warn "capitalized alias '#{term}' for a property"
end
defp case_violation_warning(:lowercased_alias, term, _, base_uri) do
IO.warn "lowercased alias '#{term}' for a non-property resource"
end
@ -319,6 +466,7 @@ defmodule RDF.Vocabulary.Namespace do
end
defp rdf_data_env do
import RDF.Sigils
__ENV__
end
@ -330,7 +478,7 @@ defmodule RDF.Vocabulary.Namespace do
%URI{} -> true
_ -> false
end)
|> Stream.map(&to_string/1)
|> Stream.map(&URI.to_string/1)
|> Stream.map(&(strip_base_uri(&1, base_uri)))
|> Stream.filter(&vocab_term?/1)
|> Enum.map(&String.to_atom/1)
@ -366,8 +514,9 @@ defmodule RDF.Vocabulary.Namespace do
defp vocab_term?(_), do: false
@doc false
def term_to_uri(base_uri, term) do
URI.parse(base_uri <> to_string(term))
end
def term_to_uri(base_uri, term) when is_atom(term),
do: term_to_uri(base_uri, Atom.to_string(term))
def term_to_uri(base_uri, term),
do: URI.parse(base_uri <> term)
end

View file

@ -3,8 +3,9 @@ defmodule RDF.Vocabulary.NamespaceTest do
doctest RDF.Vocabulary.Namespace
alias RDF.Description
import RDF.Sigils
alias RDF.Description
defmodule TestNS do
use RDF.Vocabulary.Namespace
@ -20,8 +21,8 @@ defmodule RDF.Vocabulary.NamespaceTest do
defvocab Example1,
base_uri: "http://example.com/example1#",
data: RDF.Graph.new([
{"http://example.com/example1#foo", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "http://www.w3.org/1999/02/22-rdf-syntax-ns#Property"},
{"http://example.com/example1#Bar", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "http://www.w3.org/2000/01/rdf-schema#Resource"}
{~I<http://example.com/example1#foo>, ~I<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>, ~I<http://www.w3.org/1999/02/22-rdf-syntax-ns#Property>},
{~I<http://example.com/example1#Bar>, ~I<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>, ~I<http://www.w3.org/2000/01/rdf-schema#Resource>}
])
defvocab Example2,
@ -104,6 +105,22 @@ defmodule RDF.Vocabulary.NamespaceTest do
end
end
test "special terms" do
defmodule NSEdge do
use RDF.Vocabulary.Namespace
defvocab Example,
base_uri: "http://example.com/ex#",
terms: ~w[nil true false]
end
alias NSEdge.Example
assert Example.nil == ~I<http://example.com/ex#nil>
assert Example.true == ~I<http://example.com/ex#true>
assert Example.false == ~I<http://example.com/ex#false>
end
test "when the given file not found, an error is raised" do
assert_raise File.Error, fn ->
defmodule BadNS5 do
@ -116,7 +133,8 @@ defmodule RDF.Vocabulary.NamespaceTest do
end
end
test "when the alias contains invalid characters term, an error is raised" do
test "when an alias contains invalid characters, an error is raised" do
assert_raise RDF.Namespace.InvalidAliasError, fn ->
defmodule BadNS12 do
use RDF.Vocabulary.Namespace
@ -129,7 +147,6 @@ defmodule RDF.Vocabulary.NamespaceTest do
end
end
test "when trying to map an already existing term, an error is raised" do
assert_raise RDF.Namespace.InvalidAliasError, fn ->
defmodule BadNS6 do
@ -245,14 +262,120 @@ defmodule RDF.Vocabulary.NamespaceTest do
end
test "when a term contains unallowed characters it does not fail when invalid_characters = :ignore" do
defmodule BadNS11 do
use RDF.Vocabulary.Namespace
defvocab Example,
base_uri: "http://example.com/example#",
terms: ~w[Foo-bar foo-bar],
invalid_characters: :ignore
end
defmodule BadNS11 do
use RDF.Vocabulary.Namespace
defvocab Example,
base_uri: "http://example.com/example#",
terms: ~w[Foo-bar foo-bar],
invalid_characters: :ignore
end
end
end
describe "case violation handling" do
test "aliases can fix case violations" do
defmodule NS23 do
use RDF.Vocabulary.Namespace
defvocab Example,
base_uri: "http://example.com/ex#",
case_violations: :fail,
data: RDF.Graph.new([
{~I<http://example.com/ex#Foo>, ~I<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>, ~I<http://www.w3.org/1999/02/22-rdf-syntax-ns#Property>},
{~I<http://example.com/ex#bar>, ~I<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>, ~I<http://www.w3.org/2000/01/rdf-schema#Resource>}
]),
alias: [
foo: "Foo",
Bar: "bar",
]
end
end
test "when case_violations == :ignore is set, case violations are ignored" do
defmodule NS24 do
use RDF.Vocabulary.Namespace
defvocab Example,
base_uri: "http://example.com/ex#",
case_violations: :ignore,
data: RDF.Graph.new([
{~I<http://example.com/ex#Foo>, ~I<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>, ~I<http://www.w3.org/1999/02/22-rdf-syntax-ns#Property>},
{~I<http://example.com/ex#bar>, ~I<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>, ~I<http://www.w3.org/2000/01/rdf-schema#Resource>}
]),
alias: [
foo: "Foo",
Bar: "bar",
]
end
end
test "a capitalized property without an alias and :case_violations == :fail, raises an error" do
assert_raise RDF.Namespace.InvalidTermError, ~r<http://example\.com/ex#Foo>s, fn ->
defmodule BadNS13 do
use RDF.Vocabulary.Namespace
defvocab Example,
base_uri: "http://example.com/ex#",
case_violations: :fail,
data: RDF.Graph.new([
{~I<http://example.com/ex#Foo>, ~I<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>, ~I<http://www.w3.org/1999/02/22-rdf-syntax-ns#Property>},
])
end
end
end
test "a lowercased non-property without an alias and :case_violations == :fail, raises an error" do
assert_raise RDF.Namespace.InvalidTermError, ~r<http://example\.com/ex#bar>s, fn ->
defmodule BadNS14 do
use RDF.Vocabulary.Namespace
defvocab Example,
base_uri: "http://example.com/ex#",
case_violations: :fail,
data: RDF.Graph.new([
{~I<http://example.com/ex#bar>, ~I<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>, ~I<http://www.w3.org/2000/01/rdf-schema#Resource>}
])
end
end
end
test "a capitalized alias for a property and :case_violations == :fail, raises an error" do
assert_raise RDF.Namespace.InvalidTermError, fn ->
defmodule BadNS15 do
use RDF.Vocabulary.Namespace
defvocab Example,
base_uri: "http://example.com/ex#",
case_violations: :fail,
data: RDF.Graph.new([
{~I<http://example.com/ex#foo>, ~I<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>, ~I<http://www.w3.org/1999/02/22-rdf-syntax-ns#Property>},
]),
alias: [Foo: "foo"]
end
end
end
test "a lowercased alias for a non-property and :case_violations == :fail, raises an error" do
assert_raise RDF.Namespace.InvalidTermError, fn ->
defmodule BadNS16 do
use RDF.Vocabulary.Namespace
defvocab Example,
base_uri: "http://example.com/ex#",
case_violations: :fail,
data: RDF.Graph.new([
{~I<http://example.com/ex#Bar>, ~I<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>, ~I<http://www.w3.org/2000/01/rdf-schema#Resource>}
]),
alias: [bar: "Bar"]
end
end
end
end