core: configurable handling of invalid characters in terms

This commit is contained in:
Marcel Otto 2017-05-26 21:22:26 +02:00
parent af751dd2bb
commit 44cb4b0815
2 changed files with 97 additions and 1 deletions

View file

@ -54,7 +54,7 @@ defmodule RDF.Vocabulary.Namespace do
defmacro defvocab(name, opts) do
base_uri = base_uri!(opts)
file = filename!(opts)
terms = terms!(opts) |> term_mapping!(opts)
terms = terms!(opts) |> term_mapping!(opts) |> validate_terms!(opts)
strict = strict?(opts)
case_separated_terms = group_terms_by_case(terms)
lowercased_terms = Map.get(case_separated_terms, :lowercased, %{})
@ -214,6 +214,10 @@ defmodule RDF.Vocabulary.Namespace do
|> Enum.reduce(terms, fn {alias, original_term}, terms ->
term = String.to_atom(original_term)
cond do
not valid_term?(alias) ->
raise RDF.Namespace.InvalidAliasError,
"alias '#{alias}' contains invalid characters"
Map.get(terms, alias) == true ->
raise RDF.Namespace.InvalidAliasError,
"alias '#{alias}' already defined"
@ -232,6 +236,59 @@ defmodule RDF.Vocabulary.Namespace do
end)
end
defp validate_terms!(terms, opts) do
if (handling = Keyword.get(opts, :invalid_characters, :fail)) == :ignore do
terms
else
terms
|> detect_invalid_terms(opts)
|> handle_invalid_terms(handling, terms, opts)
end
end
defp detect_invalid_terms(terms, _opts) do
aliased =
terms
|> Map.values
|> MapSet.new
|> MapSet.delete(true)
|> Enum.map(&String.to_atom/1)
terms
|> Stream.filter(fn {term, _} ->
not valid_term?(term) and not term in aliased
end)
|> Enum.map(fn {term, _} -> term end)
end
defp handle_invalid_terms([], _, terms, _), do: terms
defp handle_invalid_terms(invalid_terms, :fail, _, _) do
raise RDF.Namespace.InvalidTermError, """
The following terms contain invalid characters:
- #{Enum.join(invalid_terms, "\n- ")}
You have the following options:
- if you are in control of the vocabulary, consider renaming the resource
- define an alias with the :alias option on defvocab
- change the handling of invalid characters with the :invalid_characters option on defvocab
"""
end
defp handle_invalid_terms(invalid_terms, :warn, terms, _) do
Enum.each invalid_terms, fn term ->
IO.warn "'#{term}' is not valid term, since it contains invalid characters"
end
terms
end
defp valid_term?(nil), do: true
defp valid_term?(term) do
Regex.match?(~r/^[a-zA-Z_]\w*$/, to_string(term))
end
def filename!(opts) do
if filename = Keyword.get(opts, :file) do
cond do
@ -290,6 +347,7 @@ defmodule RDF.Vocabulary.Namespace do
end
end
defp vocab_term?(""), do: false
defp vocab_term?(term) when is_binary(term) do
not String.contains?(term, "/")
end

View file

@ -116,6 +116,20 @@ defmodule RDF.Vocabulary.NamespaceTest do
end
end
test "when the alias contains invalid characters term, an error is raised" do
assert_raise RDF.Namespace.InvalidAliasError, fn ->
defmodule BadNS12 do
use RDF.Vocabulary.Namespace
defvocab Example,
base_uri: "http://example.com/ex#",
terms: ~w[foo],
alias: ["foo-bar": "foo"]
end
end
end
test "when trying to map an already existing term, an error is raised" do
assert_raise RDF.Namespace.InvalidAliasError, fn ->
defmodule BadNS6 do
@ -203,6 +217,30 @@ defmodule RDF.Vocabulary.NamespaceTest do
end
end
describe "invalid character handling" do
test "when a term contains unallowed characters and no alias defined, it fails when invalid_characters = :fail" do
assert_raise RDF.Namespace.InvalidTermError, ~r/Foo-bar.*foo-bar/s,
fn ->
defmodule BadNS10 do
use RDF.Vocabulary.Namespace
defvocab Example,
base_uri: "http://example.com/example#",
terms: ~w[Foo-bar foo-bar]
end
end
end
test "when a term contains unallowed characters it does not fail when invalid_characters = :ignore" do
defmodule BadNS11 do
use RDF.Vocabulary.Namespace
defvocab Example,
base_uri: "http://example.com/example#",
terms: ~w[Foo-bar foo-bar],
invalid_characters: :ignore
end
end
end
@tag skip: "TODO: Can we make RDF.uri(:foo) an undefined function call with guards or in another way?"
test "resolving an unqualified term raises an error" do