add TLD validation support
This commit is contained in:
parent
34e4e2f953
commit
35810b945e
3 changed files with 1606 additions and 4 deletions
|
@ -28,13 +28,15 @@ defmodule AutoLinker.Parser do
|
||||||
# @invalid_url ~r/\.\.+/
|
# @invalid_url ~r/\.\.+/
|
||||||
@invalid_url ~r/(\.\.+)|(^(\d+\.){1,2}\d+$)/
|
@invalid_url ~r/(\.\.+)|(^(\d+\.){1,2}\d+$)/
|
||||||
|
|
||||||
@match_url ~r{^[\w\.-]+(?:\.[\w\.-]+)+[\w\-\._~:/?#[\]@!\$&'\(\)\*\+,;=.]+$}
|
@match_url ~r{^[\w\.-]+(?<tld>\.[\w\.-]+)+[\w\-\._~:/?#[\]@!\$&'\(\)\*\+,;=.]+$}
|
||||||
@match_scheme ~r{^(?:http(s)?:\/\/)?[\w.-]+(?:\.[\w\.-]+)+[\w\-\._~:/?#[\]@!\$&'\(\)\*\+,;=.]+$}
|
@match_scheme ~r{^(?:http(s)?:\/\/)?[\w.-]+(?<tld>\.[\w\.-]+)+[\w\-\._~:/?#[\]@!\$&'\(\)\*\+,;=.]+$}
|
||||||
|
|
||||||
@match_phone ~r"((?:x\d{2,7})|(?:(?:\+?1\s?(?:[.-]\s?)?)?(?:\(\s?(?:[2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9])\s?\)|(?:[2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9]))\s?(?:[.-]\s?)?)(?:[2-9]1[02-9]|[2-9][02-9]1|[2-9][02-9]{2})\s?(?:[.-]\s?)?(?:[0-9]{4}))"
|
@match_phone ~r"((?:x\d{2,7})|(?:(?:\+?1\s?(?:[.-]\s?)?)?(?:\(\s?(?:[2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9])\s?\)|(?:[2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9]))\s?(?:[.-]\s?)?)(?:[2-9]1[02-9]|[2-9][02-9]1|[2-9][02-9]{2})\s?(?:[.-]\s?)?(?:[0-9]{4}))"
|
||||||
|
|
||||||
@default_opts ~w(url)a
|
@default_opts ~w(url)a
|
||||||
|
|
||||||
|
@tlds "./priv/tlds.txt" |> File.read!() |> String.trim() |> String.split("\n")
|
||||||
|
|
||||||
def parse(text, opts \\ %{})
|
def parse(text, opts \\ %{})
|
||||||
def parse(text, list) when is_list(list), do: parse(text, Enum.into(list, %{}))
|
def parse(text, list) when is_list(list), do: parse(text, Enum.into(list, %{}))
|
||||||
|
|
||||||
|
@ -183,7 +185,7 @@ defmodule AutoLinker.Parser do
|
||||||
if Regex.match?(@invalid_url, buffer) do
|
if Regex.match?(@invalid_url, buffer) do
|
||||||
false
|
false
|
||||||
else
|
else
|
||||||
Regex.match?(@match_scheme, buffer)
|
Regex.run(@match_scheme, buffer, capture: [:tld]) |> is_valid_tld?()
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -191,10 +193,13 @@ defmodule AutoLinker.Parser do
|
||||||
if Regex.match?(@invalid_url, buffer) do
|
if Regex.match?(@invalid_url, buffer) do
|
||||||
false
|
false
|
||||||
else
|
else
|
||||||
Regex.match?(@match_url, buffer)
|
Regex.run(@match_url, buffer, capture: [:tld]) |> is_valid_tld?()
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def is_valid_tld?(["." <> tld]), do: tld in @tlds
|
||||||
|
def is_valid_tld?(_), do: false
|
||||||
|
|
||||||
@doc false
|
@doc false
|
||||||
def match_phone(buffer) do
|
def match_phone(buffer) do
|
||||||
case Regex.scan(@match_phone, buffer) do
|
case Regex.scan(@match_phone, buffer) do
|
||||||
|
|
1543
priv/tlds.txt
Normal file
1543
priv/tlds.txt
Normal file
File diff suppressed because it is too large
Load diff
|
@ -27,4 +27,58 @@ defmodule AutoLinkerTest do
|
||||||
"<a href=\"#\" class=\"phone-number\" data-phone=\"8888888888\">888 888-8888</a>" <>
|
"<a href=\"#\" class=\"phone-number\" data-phone=\"8888888888\">888 888-8888</a>" <>
|
||||||
" <a href='a.com' class='auto-linker' target='_blank' rel='noopener noreferrer'>ab</a>"
|
" <a href='a.com' class='auto-linker' target='_blank' rel='noopener noreferrer'>ab</a>"
|
||||||
end
|
end
|
||||||
|
|
||||||
|
describe "TLDs" do
|
||||||
|
test "parse with scheme" do
|
||||||
|
text = "https://google.com"
|
||||||
|
|
||||||
|
expected =
|
||||||
|
"<a href='https://google.com' class='auto-linker' target='_blank' rel='noopener noreferrer'>google.com</a>"
|
||||||
|
|
||||||
|
assert AutoLinker.link(text, scheme: true) == expected
|
||||||
|
end
|
||||||
|
|
||||||
|
test "only existing TLDs with scheme" do
|
||||||
|
text = "this url https://google.foobar.blah11blah/ has invalid TLD"
|
||||||
|
|
||||||
|
expected = "this url https://google.foobar.blah11blah/ has invalid TLD"
|
||||||
|
assert AutoLinker.link(text, scheme: true) == expected
|
||||||
|
|
||||||
|
text = "this url https://google.foobar.com/ has valid TLD"
|
||||||
|
|
||||||
|
expected =
|
||||||
|
"this url <a href='https://google.foobar.com/' class='auto-linker' target='_blank' rel='noopener noreferrer'>google.foobar.com/</a> has valid TLD"
|
||||||
|
|
||||||
|
assert AutoLinker.link(text, scheme: true) == expected
|
||||||
|
end
|
||||||
|
|
||||||
|
test "only existing TLDs without scheme" do
|
||||||
|
text = "this url google.foobar.blah11blah/ has invalid TLD"
|
||||||
|
expected = "this url google.foobar.blah11blah/ has invalid TLD"
|
||||||
|
assert AutoLinker.link(text, scheme: false) == expected
|
||||||
|
|
||||||
|
text = "this url google.foobar.com/ has valid TLD"
|
||||||
|
|
||||||
|
expected =
|
||||||
|
"this url <a href='http://google.foobar.com/' class='auto-linker' target='_blank' rel='noopener noreferrer'>google.foobar.com/</a> has valid TLD"
|
||||||
|
|
||||||
|
assert AutoLinker.link(text, scheme: false) == expected
|
||||||
|
end
|
||||||
|
|
||||||
|
test "only existing TLDs with and without scheme" do
|
||||||
|
text = "this url http://google.foobar.com/ has valid TLD"
|
||||||
|
|
||||||
|
expected =
|
||||||
|
"this url <a href='http://google.foobar.com/' class='auto-linker' target='_blank' rel='noopener noreferrer'>google.foobar.com/</a> has valid TLD"
|
||||||
|
|
||||||
|
assert AutoLinker.link(text, scheme: true) == expected
|
||||||
|
|
||||||
|
text = "this url google.foobar.com/ has valid TLD"
|
||||||
|
|
||||||
|
expected =
|
||||||
|
"this url <a href='http://google.foobar.com/' class='auto-linker' target='_blank' rel='noopener noreferrer'>google.foobar.com/</a> has valid TLD"
|
||||||
|
|
||||||
|
assert AutoLinker.link(text, scheme: true) == expected
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
Reference in a new issue