add TLD validation support

This commit is contained in:
Egor Kislitsyn 2019-02-05 18:27:58 +07:00
parent 34e4e2f953
commit 35810b945e
3 changed files with 1606 additions and 4 deletions

View file

@ -28,13 +28,15 @@ defmodule AutoLinker.Parser do
# @invalid_url ~r/\.\.+/ # @invalid_url ~r/\.\.+/
@invalid_url ~r/(\.\.+)|(^(\d+\.){1,2}\d+$)/ @invalid_url ~r/(\.\.+)|(^(\d+\.){1,2}\d+$)/
@match_url ~r{^[\w\.-]+(?:\.[\w\.-]+)+[\w\-\._~:/?#[\]@!\$&'\(\)\*\+,;=.]+$} @match_url ~r{^[\w\.-]+(?<tld>\.[\w\.-]+)+[\w\-\._~:/?#[\]@!\$&'\(\)\*\+,;=.]+$}
@match_scheme ~r{^(?:http(s)?:\/\/)?[\w.-]+(?:\.[\w\.-]+)+[\w\-\._~:/?#[\]@!\$&'\(\)\*\+,;=.]+$} @match_scheme ~r{^(?:http(s)?:\/\/)?[\w.-]+(?<tld>\.[\w\.-]+)+[\w\-\._~:/?#[\]@!\$&'\(\)\*\+,;=.]+$}
@match_phone ~r"((?:x\d{2,7})|(?:(?:\+?1\s?(?:[.-]\s?)?)?(?:\(\s?(?:[2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9])\s?\)|(?:[2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9]))\s?(?:[.-]\s?)?)(?:[2-9]1[02-9]|[2-9][02-9]1|[2-9][02-9]{2})\s?(?:[.-]\s?)?(?:[0-9]{4}))" @match_phone ~r"((?:x\d{2,7})|(?:(?:\+?1\s?(?:[.-]\s?)?)?(?:\(\s?(?:[2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9])\s?\)|(?:[2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9]))\s?(?:[.-]\s?)?)(?:[2-9]1[02-9]|[2-9][02-9]1|[2-9][02-9]{2})\s?(?:[.-]\s?)?(?:[0-9]{4}))"
@default_opts ~w(url)a @default_opts ~w(url)a
@tlds "./priv/tlds.txt" |> File.read!() |> String.trim() |> String.split("\n")
def parse(text, opts \\ %{}) def parse(text, opts \\ %{})
def parse(text, list) when is_list(list), do: parse(text, Enum.into(list, %{})) def parse(text, list) when is_list(list), do: parse(text, Enum.into(list, %{}))
@ -183,7 +185,7 @@ defmodule AutoLinker.Parser do
if Regex.match?(@invalid_url, buffer) do if Regex.match?(@invalid_url, buffer) do
false false
else else
Regex.match?(@match_scheme, buffer) Regex.run(@match_scheme, buffer, capture: [:tld]) |> is_valid_tld?()
end end
end end
@ -191,10 +193,13 @@ defmodule AutoLinker.Parser do
if Regex.match?(@invalid_url, buffer) do if Regex.match?(@invalid_url, buffer) do
false false
else else
Regex.match?(@match_url, buffer) Regex.run(@match_url, buffer, capture: [:tld]) |> is_valid_tld?()
end end
end end
def is_valid_tld?(["." <> tld]), do: tld in @tlds
def is_valid_tld?(_), do: false
@doc false @doc false
def match_phone(buffer) do def match_phone(buffer) do
case Regex.scan(@match_phone, buffer) do case Regex.scan(@match_phone, buffer) do

1543
priv/tlds.txt Normal file

File diff suppressed because it is too large Load diff

View file

@ -27,4 +27,58 @@ defmodule AutoLinkerTest do
"<a href=\"#\" class=\"phone-number\" data-phone=\"8888888888\">888 888-8888</a>" <> "<a href=\"#\" class=\"phone-number\" data-phone=\"8888888888\">888 888-8888</a>" <>
" <a href='a.com' class='auto-linker' target='_blank' rel='noopener noreferrer'>ab</a>" " <a href='a.com' class='auto-linker' target='_blank' rel='noopener noreferrer'>ab</a>"
end end
describe "TLDs" do
test "parse with scheme" do
text = "https://google.com"
expected =
"<a href='https://google.com' class='auto-linker' target='_blank' rel='noopener noreferrer'>google.com</a>"
assert AutoLinker.link(text, scheme: true) == expected
end
test "only existing TLDs with scheme" do
text = "this url https://google.foobar.blah11blah/ has invalid TLD"
expected = "this url https://google.foobar.blah11blah/ has invalid TLD"
assert AutoLinker.link(text, scheme: true) == expected
text = "this url https://google.foobar.com/ has valid TLD"
expected =
"this url <a href='https://google.foobar.com/' class='auto-linker' target='_blank' rel='noopener noreferrer'>google.foobar.com/</a> has valid TLD"
assert AutoLinker.link(text, scheme: true) == expected
end
test "only existing TLDs without scheme" do
text = "this url google.foobar.blah11blah/ has invalid TLD"
expected = "this url google.foobar.blah11blah/ has invalid TLD"
assert AutoLinker.link(text, scheme: false) == expected
text = "this url google.foobar.com/ has valid TLD"
expected =
"this url <a href='http://google.foobar.com/' class='auto-linker' target='_blank' rel='noopener noreferrer'>google.foobar.com/</a> has valid TLD"
assert AutoLinker.link(text, scheme: false) == expected
end
test "only existing TLDs with and without scheme" do
text = "this url http://google.foobar.com/ has valid TLD"
expected =
"this url <a href='http://google.foobar.com/' class='auto-linker' target='_blank' rel='noopener noreferrer'>google.foobar.com/</a> has valid TLD"
assert AutoLinker.link(text, scheme: true) == expected
text = "this url google.foobar.com/ has valid TLD"
expected =
"this url <a href='http://google.foobar.com/' class='auto-linker' target='_blank' rel='noopener noreferrer'>google.foobar.com/</a> has valid TLD"
assert AutoLinker.link(text, scheme: true) == expected
end
end
end end