Prevent vocabulary terms conflicting with Elixir semantics

This commit is contained in:
Marcel Otto 2017-06-23 17:25:27 +02:00
parent 8ab2ab1e15
commit 206bc1cbd9
2 changed files with 282 additions and 54 deletions

View file

@ -5,7 +5,7 @@ defmodule RDF.Vocabulary.Namespace do
`RDF.Vocabulary.Namespace` modules represent a RDF vocabulary as a `RDF.Namespace`. `RDF.Vocabulary.Namespace` modules represent a RDF vocabulary as a `RDF.Namespace`.
They can be defined with the `defvocab/2` macro of this module. They can be defined with the `defvocab/2` macro of this module.
RDF.ex comes with predefined modules for some fundamentals vocabularies in RDF.ex comes with predefined modules for some fundamental vocabularies in
the `RDF.NS` module. the `RDF.NS` module.
Furthermore, the [rdf_vocab](https://hex.pm/packages/rdf_vocab) package Furthermore, the [rdf_vocab](https://hex.pm/packages/rdf_vocab) package
contains predefined modules for popular vocabularies. contains predefined modules for popular vocabularies.
@ -44,7 +44,8 @@ defmodule RDF.Vocabulary.Namespace do
terms terms
|> term_mapping!(opts) |> term_mapping!(opts)
|> Map.drop(MapSet.to_list(ignored_terms)) |> Map.drop(MapSet.to_list(ignored_terms))
|> validate_terms!(opts) |> validate_terms!
|> validate_characters!(opts)
|> validate_case!(data, base_uri, opts) |> validate_case!(data, base_uri, opts)
case_separated_terms = group_terms_by_case(terms) case_separated_terms = group_terms_by_case(terms)
lowercased_terms = Map.get(case_separated_terms, :lowercased, %{}) lowercased_terms = Map.get(case_separated_terms, :lowercased, %{})
@ -127,6 +128,10 @@ defmodule RDF.Vocabulary.Namespace do
@doc false @doc false
defmacro define_vocab_terms(terms, base_uri) do defmacro define_vocab_terms(terms, base_uri) do
terms terms
|> Stream.filter(fn
{term, true} -> valid_term?(term)
{term, original_term} -> true
end)
|> Stream.map(fn |> Stream.map(fn
{term, true} -> {term, term} {term, true} -> {term, term}
{term, original_term} -> {term, original_term} {term, original_term} -> {term, original_term}
@ -204,6 +209,21 @@ defmodule RDF.Vocabulary.Namespace do
end end
defp ignored_terms!(opts) do
# TODO: find an alternative to Code.eval_quoted - We want to support that the terms can be given as sigils ...
with terms = Keyword.get(opts, :ignore, []) do
{terms, _ } = Code.eval_quoted(terms, [], rdf_data_env())
terms
|> Enum.map(fn
term when is_atom(term) -> term
term when is_binary(term) -> String.to_atom(term)
term -> raise RDF.Namespace.InvalidTermError, inspect(term)
end)
|> MapSet.new
end
end
defp term_mapping!(terms, opts) do defp term_mapping!(terms, opts) do
terms = Map.new terms, fn terms = Map.new terms, fn
term when is_atom(term) -> {term, true} term when is_atom(term) -> {term, true}
@ -213,7 +233,7 @@ defmodule RDF.Vocabulary.Namespace do
|> Enum.reduce(terms, fn {alias, original_term}, terms -> |> Enum.reduce(terms, fn {alias, original_term}, terms ->
term = String.to_atom(original_term) term = String.to_atom(original_term)
cond do cond do
not valid_term?(alias) -> not valid_characters?(alias) ->
raise RDF.Namespace.InvalidAliasError, raise RDF.Namespace.InvalidAliasError,
"alias '#{alias}' contains invalid characters" "alias '#{alias}' contains invalid characters"
@ -243,28 +263,85 @@ defmodule RDF.Vocabulary.Namespace do
|> Enum.map(&String.to_atom/1) |> Enum.map(&String.to_atom/1)
end end
defp validate_terms!(terms, opts) do @invalid_terms MapSet.new ~w[
and
or
xor
in
fn
def
when
if
for
case
with
quote
unquote
unquote_splicing
alias
import
require
super
__aliases__
]a
def invalid_terms, do: @invalid_terms
defp validate_terms!(terms) do
aliased_terms = aliased_terms(terms)
terms
|> Enum.filter_map(
fn {term, _} ->
not term in aliased_terms and not valid_term?(term)
end,
fn {term, _} -> term end)
|> handle_invalid_terms!
terms
end
defp valid_term?(term) do
not MapSet.member?(@invalid_terms, term)
end
defp handle_invalid_terms!([]), do: nil
defp handle_invalid_terms!(invalid_terms) do
raise RDF.Namespace.InvalidTermError, """
The following terms can not be used, because they conflict with the Elixir semantics:
- #{Enum.join(invalid_terms, "\n- ")}
You have the following options:
- define an alias with the :alias option on defvocab
- ignore the resource with the :ignore option on defvocab
"""
end
defp validate_characters!(terms, opts) do
if (handling = Keyword.get(opts, :invalid_characters, :fail)) == :ignore do if (handling = Keyword.get(opts, :invalid_characters, :fail)) == :ignore do
terms terms
else else
terms terms
|> detect_invalid_terms |> detect_invalid_characters
|> handle_invalid_terms(handling, terms) |> handle_invalid_characters(handling, terms)
end end
end end
defp detect_invalid_terms(terms) do defp detect_invalid_characters(terms) do
aliased_terms = aliased_terms(terms) aliased_terms = aliased_terms(terms)
Enum.filter_map terms, Enum.filter_map terms,
fn {term, _} -> fn {term, _} ->
not term in aliased_terms and not valid_term?(term) not term in aliased_terms and not valid_characters?(term)
end, end,
fn {term, _} -> term end fn {term, _} -> term end
end end
defp handle_invalid_terms([], _, terms), do: terms defp handle_invalid_characters([], _, terms), do: terms
defp handle_invalid_terms(invalid_terms, :fail, _) do defp handle_invalid_characters(invalid_terms, :fail, _) do
raise RDF.Namespace.InvalidTermError, """ raise RDF.Namespace.InvalidTermError, """
The following terms contain invalid characters: The following terms contain invalid characters:
@ -275,19 +352,20 @@ defmodule RDF.Vocabulary.Namespace do
- if you are in control of the vocabulary, consider renaming the resource - if you are in control of the vocabulary, consider renaming the resource
- define an alias with the :alias option on defvocab - define an alias with the :alias option on defvocab
- change the handling of invalid characters with the :invalid_characters option on defvocab - change the handling of invalid characters with the :invalid_characters option on defvocab
- ignore the resource with the :ignore option on defvocab
""" """
end end
defp handle_invalid_terms(invalid_terms, :warn, terms) do defp handle_invalid_characters(invalid_terms, :warn, terms) do
Enum.each invalid_terms, fn term -> Enum.each invalid_terms, fn term ->
IO.warn "'#{term}' is not valid term, since it contains invalid characters" IO.warn "'#{term}' is not valid term, since it contains invalid characters"
end end
terms terms
end end
defp valid_term?(term) when is_atom(term), defp valid_characters?(term) when is_atom(term),
do: valid_term?(Atom.to_string(term)) do: valid_characters?(Atom.to_string(term))
defp valid_term?(term), defp valid_characters?(term),
do: Regex.match?(~r/^[a-zA-Z_]\w*$/, term) do: Regex.match?(~r/^[a-zA-Z_]\w*$/, term)
defp validate_case!(terms, nil, _, _), do: terms defp validate_case!(terms, nil, _, _), do: terms
@ -405,6 +483,7 @@ defmodule RDF.Vocabulary.Namespace do
- if you are in control of the vocabulary, consider renaming the resource - if you are in control of the vocabulary, consider renaming the resource
- define a properly cased alias with the :alias option on defvocab - define a properly cased alias with the :alias option on defvocab
- change the handling of case violations with the :case_violations option on defvocab - change the handling of case violations with the :case_violations option on defvocab
- ignore the resource with the :ignore option on defvocab
""" """
end end
@ -434,20 +513,6 @@ defmodule RDF.Vocabulary.Namespace do
end end
defp ignored_terms!(opts) do
# TODO: find an alternative to Code.eval_quoted - We want to support that the terms can be given as sigils ...
with terms = Keyword.get(opts, :ignore, []) do
{terms, _ } = Code.eval_quoted(terms, [], rdf_data_env())
terms
|> Enum.map(fn
term when is_atom(term) -> term
term when is_binary(term) -> String.to_atom(term)
term -> raise RDF.Namespace.InvalidTermError, inspect(term)
end)
|> MapSet.new
end
end
defp filename!(opts) do defp filename!(opts) do
if filename = Keyword.get(opts, :file) do if filename = Keyword.get(opts, :file) do
cond do cond do

View file

@ -141,22 +141,6 @@ defmodule RDF.Vocabulary.NamespaceTest do
end end
test "defvocab with special terms" do
defmodule NSofEdgeCases do
use RDF.Vocabulary.Namespace
defvocab Example,
base_uri: "http://example.com/ex#",
terms: ~w[nil true false]
end
alias NSofEdgeCases.Example
assert Example.nil == ~I<http://example.com/ex#nil>
assert Example.true == ~I<http://example.com/ex#true>
assert Example.false == ~I<http://example.com/ex#false>
end
describe "defvocab with bad aliases" do describe "defvocab with bad aliases" do
test "when an alias contains invalid characters, an error is raised" do test "when an alias contains invalid characters, an error is raised" do
assert_raise RDF.Namespace.InvalidAliasError, fn -> assert_raise RDF.Namespace.InvalidAliasError, fn ->
@ -212,10 +196,190 @@ defmodule RDF.Vocabulary.NamespaceTest do
end end
test "defvocab with special terms" do
defmodule NSofEdgeCases do
use RDF.Vocabulary.Namespace
defvocab Example,
base_uri: "http://example.com/ex#",
terms: ~w[
nil
true
false
do
end
else
try
rescue
catch
after
not
cond
inbits
inlist
receive
__block__
__info__
__MODULE__
__FILE__
__DIR__
__ENV__
__CALLER__
]
end
alias NSofEdgeCases.Example
alias TestNS.EX
assert Example.nil == ~I<http://example.com/ex#nil>
assert Example.true == ~I<http://example.com/ex#true>
assert Example.false == ~I<http://example.com/ex#false>
assert Example.do == ~I<http://example.com/ex#do>
assert Example.end == ~I<http://example.com/ex#end>
assert Example.else == ~I<http://example.com/ex#else>
assert Example.try == ~I<http://example.com/ex#try>
assert Example.rescue == ~I<http://example.com/ex#rescue>
assert Example.catch == ~I<http://example.com/ex#catch>
assert Example.after == ~I<http://example.com/ex#after>
assert Example.not == ~I<http://example.com/ex#not>
assert Example.cond == ~I<http://example.com/ex#cond>
assert Example.inbits == ~I<http://example.com/ex#inbits>
assert Example.inlist == ~I<http://example.com/ex#inlist>
assert Example.receive == ~I<http://example.com/ex#receive>
assert Example.__block__ == ~I<http://example.com/ex#__block__>
assert Example.__info__ == ~I<http://example.com/ex#__info__>
assert Example.__MODULE__ == ~I<http://example.com/ex#__MODULE__>
assert Example.__FILE__ == ~I<http://example.com/ex#__FILE__>
assert Example.__DIR__ == ~I<http://example.com/ex#__DIR__>
assert Example.__ENV__ == ~I<http://example.com/ex#__ENV__>
assert Example.__CALLER__ == ~I<http://example.com/ex#__CALLER__>
assert Example.nil( EX.S, 1) == RDF.description(EX.S, Example.nil , 1)
assert Example.true( EX.S, 1) == RDF.description(EX.S, Example.true , 1)
assert Example.false( EX.S, 1) == RDF.description(EX.S, Example.false , 1)
assert Example.do( EX.S, 1) == RDF.description(EX.S, Example.do , 1)
assert Example.end( EX.S, 1) == RDF.description(EX.S, Example.end , 1)
assert Example.else( EX.S, 1) == RDF.description(EX.S, Example.else , 1)
assert Example.try( EX.S, 1) == RDF.description(EX.S, Example.try , 1)
assert Example.rescue( EX.S, 1) == RDF.description(EX.S, Example.rescue , 1)
assert Example.catch( EX.S, 1) == RDF.description(EX.S, Example.catch , 1)
assert Example.after( EX.S, 1) == RDF.description(EX.S, Example.after , 1)
assert Example.not( EX.S, 1) == RDF.description(EX.S, Example.not , 1)
assert Example.cond( EX.S, 1) == RDF.description(EX.S, Example.cond , 1)
assert Example.inbits( EX.S, 1) == RDF.description(EX.S, Example.inbits , 1)
assert Example.inlist( EX.S, 1) == RDF.description(EX.S, Example.inlist , 1)
assert Example.receive(EX.S, 1) == RDF.description(EX.S, Example.receive , 1)
end
describe "defvocab with invalid terms" do
test "terms with a special meaning for Elixir cause a failure" do
assert_raise RDF.Namespace.InvalidTermError, ~r/unquote_splicing/s, fn ->
defmodule NSWithElixirTerms do
use RDF.Vocabulary.Namespace
defvocab Example,
base_uri: "http://example.com/example#",
terms: RDF.Vocabulary.Namespace.invalid_terms
end
end
end
test "alias terms with a special meaning for Elixir cause a failure" do
assert_raise RDF.Namespace.InvalidTermError, ~r/unquote_splicing/s, fn ->
defmodule NSWithElixirAliasTerms do
use RDF.Vocabulary.Namespace
defvocab Example,
base_uri: "http://example.com/example#",
terms: ~w[foo],
alias: [
and: "foo",
or: "foo",
xor: "foo",
in: "foo",
fn: "foo",
def: "foo",
when: "foo",
if: "foo",
for: "foo",
case: "foo",
with: "foo",
quote: "foo",
unquote: "foo",
unquote_splicing: "foo",
alias: "foo",
import: "foo",
require: "foo",
super: "foo",
__aliases__: "foo",
]
end
end
end
test "terms with a special meaning for Elixir don't cause a failure when they are ignored" do
defmodule NSWithIgnoredElixirTerms do
use RDF.Vocabulary.Namespace
defvocab Example,
base_uri: "http://example.com/example#",
terms: RDF.Vocabulary.Namespace.invalid_terms,
ignore: RDF.Vocabulary.Namespace.invalid_terms
end
end
test "terms with a special meaning for Elixir don't cause a failure when an alias is defined" do
defmodule NSWithAliasesForElixirTerms do
use RDF.Vocabulary.Namespace
defvocab Example,
base_uri: "http://example.com/example#",
terms: RDF.Vocabulary.Namespace.invalid_terms,
alias: [
and_: "and",
or_: "or",
xor_: "xor",
in_: "in",
fn_: "fn",
def_: "def",
when_: "when",
if_: "if",
for_: "for",
case_: "case",
with_: "with",
quote_: "quote",
unquote_: "unquote",
unquote_splicing_: "unquote_splicing",
alias_: "alias",
import_: "import",
require_: "require",
super_: "super",
_aliases_: "__aliases__"
]
end
alias NSWithAliasesForElixirTerms.Example
assert Example.and_ == ~I<http://example.com/example#and>
assert Example.or_ == ~I<http://example.com/example#or>
assert Example.xor_ == ~I<http://example.com/example#xor>
assert Example.in_ == ~I<http://example.com/example#in>
assert Example.fn_ == ~I<http://example.com/example#fn>
assert Example.def_ == ~I<http://example.com/example#def>
assert Example.when_ == ~I<http://example.com/example#when>
assert Example.if_ == ~I<http://example.com/example#if>
assert Example.for_ == ~I<http://example.com/example#for>
assert Example.case_ == ~I<http://example.com/example#case>
assert Example.with_ == ~I<http://example.com/example#with>
assert Example.quote_ == ~I<http://example.com/example#quote>
assert Example.unquote_ == ~I<http://example.com/example#unquote>
assert Example.unquote_splicing_ == ~I<http://example.com/example#unquote_splicing>
assert Example.alias_ == ~I<http://example.com/example#alias>
assert Example.import_ == ~I<http://example.com/example#import>
assert Example.require_ == ~I<http://example.com/example#require>
assert Example.super_ == ~I<http://example.com/example#super>
assert Example._aliases_ == ~I<http://example.com/example#__aliases__>
end
end
describe "defvocab invalid character handling" do describe "defvocab invalid character handling" do
test "when a term contains unallowed characters and no alias defined, it fails when invalid_characters = :fail" do test "when a term contains unallowed characters and no alias defined, it fails when invalid_characters = :fail" do
assert_raise RDF.Namespace.InvalidTermError, ~r/Foo-bar.*foo-bar/s, assert_raise RDF.Namespace.InvalidTermError, ~r/Foo-bar.*foo-bar/s, fn ->
fn ->
defmodule NSWithInvalidTerms1 do defmodule NSWithInvalidTerms1 do
use RDF.Vocabulary.Namespace use RDF.Vocabulary.Namespace
defvocab Example, defvocab Example,
@ -274,7 +438,6 @@ defmodule RDF.Vocabulary.NamespaceTest do
end end
end end
test "a capitalized property without an alias and :case_violations == :fail, raises an error" do test "a capitalized property without an alias and :case_violations == :fail, raises an error" do
assert_raise RDF.Namespace.InvalidTermError, ~r<http://example\.com/ex#Foo>s, fn -> assert_raise RDF.Namespace.InvalidTermError, ~r<http://example\.com/ex#Foo>s, fn ->
defmodule NSWithBadCasedTerms3 do defmodule NSWithBadCasedTerms3 do
@ -485,7 +648,7 @@ defmodule RDF.Vocabulary.NamespaceTest do
end end
test "ignored terms with case violations do not raise anything" do test "ignored terms with case violations do not raise anything" do
defmodule IgnoredTermWithInvalidCharacters do defmodule IgnoredTermWithCaseViolations do
use RDF.Vocabulary.Namespace use RDF.Vocabulary.Namespace
defvocab Example, defvocab Example,
base_uri: "http://example.com/", base_uri: "http://example.com/",