add email, @mentions, #hashtags and extra schemes

This commit is contained in:
Egor Kislitsyn 2019-02-08 13:45:11 +07:00
parent 8ec0c74b3a
commit 479acfb82d
3 changed files with 403 additions and 7 deletions

View file

@ -112,6 +112,64 @@ defmodule AutoLinker.Builder do
}>'
end
def create_mention_link("@" <> name, _buffer, opts) do
mention_prefix = opts[:mention_prefix]
url = mention_prefix <> name
[]
|> build_attrs(url, opts, :rel)
|> build_attrs(url, opts, :target)
|> build_attrs(url, opts, :class)
|> build_attrs(url, opts, :scheme)
|> format_mention(name, opts)
end
def create_hashtag_link(tag, _buffer, opts) do
hashtag_prefix = opts[:hashtag_prefix]
url = hashtag_prefix <> tag
[]
|> build_attrs(url, opts, :rel)
|> build_attrs(url, opts, :target)
|> build_attrs(url, opts, :class)
|> build_attrs(url, opts, :scheme)
|> format_hashtag(tag, opts)
end
def create_email_link(email, opts) do
[]
|> build_attrs(email, opts, :class)
|> format_email(email, opts)
end
def create_extra_link(uri, opts) do
[]
|> build_attrs(uri, opts, :class)
|> format_extra(uri, opts)
end
def format_mention(attrs, name, _opts) do
attrs = format_attrs(attrs)
"<a #{attrs}>@" <> name <> "</a>"
end
def format_hashtag(attrs, tag, _opts) do
attrs = format_attrs(attrs)
"<a #{attrs}>#" <> tag <> "</a>"
end
def format_email(attrs, email, _opts) do
attrs = format_attrs(attrs)
"<a href='mailto:#{email}' #{attrs}>#{email}</a>"
end
def format_extra(attrs, uri, _opts) do
attrs = format_attrs(attrs)
"<a href='#{uri}' #{attrs}>#{uri}</a>"
end
defp format_attributes(attrs) do
Enum.reduce(attrs, "", fn {name, value}, acc ->
acc <> ~s' #{name}="#{value}"'

View file

@ -28,19 +28,43 @@ defmodule AutoLinker.Parser do
# @invalid_url ~r/\.\.+/
@invalid_url ~r/(\.\.+)|(^(\d+\.){1,2}\d+$)/
@match_url ~r{^[\w\.-]+(?:\.[\w\.-]+)+[\w\-\._~:/?#[\]@!\$&'\(\)\*\+,;=.]+$}
@match_scheme ~r{^(?:http(s)?:\/\/)?[\w.-]+(?:\.[\w\.-]+)+[\w\-\._~:/?#[\]@!\$&'\(\)\*\+,;=.]+$}
@match_url ~r{^[\w\.-]+(?:\.[\w\.-]+)+[\w\-\._~%:/?#[\]@!\$&'\(\)\*\+,;=.]+$}
@match_scheme ~r{^(?:http(s)?:\/\/)?[\w.-]+(?:\.[\w\.-]+)+[\w\-\._~%:/?#[\]@!\$&'\(\)\*\+,;=.]+$}
@match_phone ~r"((?:x\d{2,7})|(?:(?:\+?1\s?(?:[.-]\s?)?)?(?:\(\s?(?:[2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9])\s?\)|(?:[2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9]))\s?(?:[.-]\s?)?)(?:[2-9]1[02-9]|[2-9][02-9]1|[2-9][02-9]{2})\s?(?:[.-]\s?)?(?:[0-9]{4}))"
@match_hostname ~r{^(?:https?:\/\/)?(?:[^@\n]+@)?(?<host>[^:#~\/\n?]+)}
@match_hostname ~r{^(?:https?:\/\/)?(?:[^@\n]+\\w@)?(?<host>[^:#~\/\n?]+)}
@match_ip ~r"^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$"
@default_opts ~w(url)a
# @user
# @user@example.com
@match_mention ~r/^@[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@?[a-zA-Z0-9_-](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*/u
# https://www.w3.org/TR/html5/forms.html#valid-e-mail-address
@match_email ~r/^[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/u
@match_hashtag ~r/^\#(?<tag>\w+)/u
@prefix_extra [
"magnet:?",
"dweb://",
"dat://",
"gopher://",
"ipfs://",
"ipns://",
"irc://",
"ircs://",
"irc6://",
"mumble://",
"ssb://"
]
@tlds "./priv/tlds.txt" |> File.read!() |> String.trim() |> String.split("\n")
@default_opts ~w(url)a
def parse(text, opts \\ %{})
def parse(text, list) when is_list(list), do: parse(text, Enum.into(list, %{}))
@ -75,12 +99,30 @@ defmodule AutoLinker.Parser do
|> do_parse(Map.delete(opts, :phone))
end
defp do_parse(text, %{mention: true} = opts) do
text
|> do_parse(false, opts, {"", "", :parsing}, &check_and_link_mention/3)
|> do_parse(Map.delete(opts, :mention))
end
defp do_parse(text, %{extra: true} = opts) do
text
|> do_parse(false, opts, {"", "", :parsing}, &check_and_link_extra/3)
|> do_parse(Map.delete(opts, :extra))
end
defp do_parse(text, %{markdown: true} = opts) do
text
|> Builder.create_markdown_links(opts)
|> do_parse(Map.delete(opts, :markdown))
end
defp do_parse(text, %{email: true} = opts) do
text
|> do_parse(false, opts, {"", "", :parsing}, &check_and_link_email/3)
|> do_parse(Map.delete(opts, :email))
end
defp do_parse(text, %{url: _} = opts) do
if (exclude = Map.get(opts, :exclude_pattern, false)) && String.starts_with?(text, exclude) do
text
@ -90,6 +132,12 @@ defmodule AutoLinker.Parser do
|> do_parse(Map.delete(opts, :url))
end
defp do_parse(text, %{hashtag: true} = opts) do
text
|> do_parse(false, opts, {"", "", :parsing}, &check_and_link_hashtag/3)
|> do_parse(Map.delete(opts, :hashtag))
end
defp do_parse(text, _), do: text
defp do_parse("", _scheme, _opts, {"", acc, _}, _handler),
@ -110,8 +158,9 @@ defmodule AutoLinker.Parser do
defp do_parse(">" <> text, scheme, opts, {buffer, acc, {:attrs, level}}, handler),
do: do_parse(text, scheme, opts, {"", acc <> buffer <> ">", {:html, level}}, handler)
defp do_parse(<<ch::8>> <> text, scheme, opts, {"", acc, {:attrs, level}}, handler),
do: do_parse(text, scheme, opts, {"", acc <> <<ch::8>>, {:attrs, level}}, handler)
defp do_parse(<<ch::8>> <> text, scheme, opts, {"", acc, {:attrs, level}}, handler) do
do_parse(text, scheme, opts, {"", acc <> <<ch::8>>, {:attrs, level}}, handler)
end
defp do_parse("</" <> text, scheme, opts, {buffer, acc, {:html, level}}, handler),
do:
@ -178,13 +227,43 @@ defmodule AutoLinker.Parser do
|> link_url(buffer, opts)
end
def check_and_link_email(buffer, _, opts) do
buffer
|> is_email?
|> link_email(buffer, opts)
end
def check_and_link_phone(buffer, _, opts) do
buffer
|> match_phone
|> link_phone(buffer, opts)
end
@doc false
def check_and_link_mention(buffer, _, opts) do
buffer
|> match_mention
|> link_mention(buffer, opts)
end
def check_and_link_hashtag(buffer, _, opts) do
buffer
|> match_hashtag
|> link_hashtag(buffer, opts)
end
def check_and_link_extra("xmpp:" <> handle, _, opts) do
handle
|> is_email?
|> link_extra("xmpp:" <> handle, opts)
end
def check_and_link_extra(buffer, _, opts) do
buffer
|> String.starts_with?(@prefix_extra)
|> link_extra(buffer, opts)
end
# @doc false
def is_url?(buffer, true) do
if Regex.match?(@invalid_url, buffer) do
false
@ -201,6 +280,14 @@ defmodule AutoLinker.Parser do
end
end
def is_email?(buffer) do
if Regex.match?(@invalid_url, buffer) do
false
else
Regex.match?(@match_email, buffer) |> is_valid_tld?(buffer)
end
end
def is_valid_tld?(true, buffer) do
[host] = Regex.run(@match_hostname, buffer, capture: [:host])
@ -227,6 +314,37 @@ defmodule AutoLinker.Parser do
end
end
def match_mention(buffer) do
case Regex.run(@match_mention, buffer) do
[mention] -> mention
_ -> nil
end
end
def match_hashtag(buffer) do
case Regex.run(@match_hashtag, buffer, capture: [:tag]) do
[hashtag] -> hashtag
_ -> nil
end
end
def link_hashtag(nil, buffer, _), do: buffer
def link_hashtag(hashtag, buffer, opts) do
Builder.create_hashtag_link(hashtag, buffer, opts)
end
def link_mention(nil, buffer, _), do: buffer
def link_mention(mention, _buffer, %{mention_formatter: mention_formatter} = opts) do
{buffer, _} = mention_formatter.(mention, opts)
buffer
end
def link_mention(mention, buffer, opts) do
Builder.create_mention_link(mention, buffer, opts)
end
def link_phone(nil, buffer, _), do: buffer
def link_phone(list, buffer, opts) do
@ -239,4 +357,17 @@ defmodule AutoLinker.Parser do
end
def link_url(_, buffer, _opts), do: buffer
@doc false
def link_email(true, buffer, opts) do
Builder.create_email_link(buffer, opts)
end
def link_email(_, buffer, _opts), do: buffer
def link_extra(true, buffer, opts) do
Builder.create_extra_link(buffer, opts)
end
def link_extra(_, buffer, _opts), do: buffer
end

View file

@ -28,6 +28,213 @@ defmodule AutoLinkerTest do
" <a href='a.com' class='auto-linker' target='_blank' rel='noopener noreferrer'>ab</a>"
end
test "all kinds of links" do
text =
"hello @user google.com https://ddg.com 888 888-8888 #tag user@email.com [google.com](http://google.com) irc:///mIRC"
expected =
"hello <a href='https://example.com/user/user'>@user</a> <a href='http://google.com'>google.com</a> <a href='https://ddg.com'>ddg.com</a> <a href=\"#\" class=\"phone-number\" data-phone=\"8888888888\">888 888-8888</a> <a href='https://example.com/tag/tag'>#tag</a> <a href='mailto:user@email.com' >user@email.com</a> <a href='http://google.com'>google.com</a> <a href='irc:///mIRC' >irc:///mIRC</a>"
assert AutoLinker.link(text,
phone: true,
markdown: true,
email: true,
mention: true,
mention_prefix: "https://example.com/user/",
hashtag: true,
hashtag_prefix: "https://example.com/tag/",
scheme: true,
extra: true,
class: false,
new_window: false,
rel: false
) == expected
end
describe "mentions" do
test "simple mentions" do
expected =
~s{hello <a href='https://example.com/user/user' class='auto-linker' target='_blank' rel='noopener noreferrer'>@user</a> and <a href='https://example.com/user/anotherUser' class='auto-linker' target='_blank' rel='noopener noreferrer'>@anotherUser</a>}
assert AutoLinker.link("hello @user and @anotherUser",
mention: true,
mention_prefix: "https://example.com/user/"
) == expected
end
test "metion @user@example.com" do
text = "hey @user@example.com"
expected =
"hey <a href='https://example.com/user/user@example.com' class='auto-linker' target='_blank' rel='noopener noreferrer'>@user@example.com</a>"
assert AutoLinker.link(text,
mention: true,
mention_prefix: "https://example.com/user/"
) == expected
end
test "skip if starts with @@" do
text = "hello @@user and @anotherUser"
expected =
"hello @@user and <a href='https://example.com/user/anotherUser' class='auto-linker' target='_blank' rel='noopener noreferrer'>@anotherUser</a>"
assert AutoLinker.link(text,
mention: true,
mention_prefix: "https://example.com/user/"
) == expected
end
end
describe "hashtag links" do
test "hashtag" do
expected =
"one <a href='https://example.com/tag/two' class='auto-linker' target='_blank' rel='noopener noreferrer'>#two</a> three <a href='https://example.com/tag/four' class='auto-linker' target='_blank' rel='noopener noreferrer'>#four</a>"
assert AutoLinker.link("one #two three #four",
hashtag: true,
hashtag_prefix: "https://example.com/tag/"
) == expected
end
test "do not turn urls with hashes into hashtags" do
text = "google.com#test #test google.com/#test #tag"
expected =
"<a href='http://google.com#test'>google.com#test</a> <a href='https://example.com/tag/test'>#test</a> <a href='http://google.com/#test'>google.com/#test</a> <a href='https://example.com/tag/tag'>#tag</a>"
assert AutoLinker.link(text,
scheme: true,
hashtag: true,
class: false,
new_window: false,
rel: false,
hashtag_prefix: "https://example.com/tag/"
) == expected
end
test "works with non-latin characters" do
text = "#漢字 ##тест #ทดสอบ"
expected =
"<a href='https://example.com/tag/漢字'>#漢字</a> <a href='https://example.com/tag/は'>#は</a> <a href='https://example.com/tag/тест'>#тест</a> <a href='https://example.com/tag/ทดสอบ'>#ทดสอบ</a>"
assert AutoLinker.link(text,
scheme: true,
class: false,
new_window: false,
rel: false,
hashtag: true,
hashtag_prefix: "https://example.com/tag/"
) == expected
end
end
describe "links" do
test "turning urls into links" do
text = "Hey, check out http://www.youtube.com/watch?v=8Zg1-TufF%20zY?x=1&y=2#blabla ."
expected =
"Hey, check out <a href='http://www.youtube.com/watch?v=8Zg1-TufF%20zY?x=1&y=2#blabla' class='auto-linker' target='_blank' rel='noopener noreferrer'>youtube.com/watch?v=8Zg1-TufF%20zY?x=1&y=2#blabla</a> ."
assert AutoLinker.link(text, scheme: true) == expected
# no scheme
text = "Hey, check out www.youtube.com/watch?v=8Zg1-TufF%20zY?x=1&y=2#blabla ."
assert AutoLinker.link(text, scheme: true) == expected
end
test "hostname/@user" do
text = "https://example.com/@user"
expected =
"<a href='https://example.com/@user' class='auto-linker' target='_blank' rel='noopener noreferrer'>example.com/@user</a>"
assert AutoLinker.link(text, scheme: true) == expected
text = "https://example.com:4000/@user"
expected =
"<a href='https://example.com:4000/@user' class='auto-linker' target='_blank' rel='noopener noreferrer'>example.com:4000/@user</a>"
assert AutoLinker.link(text, scheme: true) == expected
text = "https://example.com:4000/@user"
expected =
"<a href='https://example.com:4000/@user' class='auto-linker' target='_blank' rel='noopener noreferrer'>example.com:4000/@user</a>"
assert AutoLinker.link(text, scheme: true) == expected
text = "@username"
expected = "@username"
assert AutoLinker.link(text, scheme: true) == expected
text = "http://www.cs.vu.nl/~ast/intel/"
expected =
"<a href='http://www.cs.vu.nl/~ast/intel/' class='auto-linker' target='_blank' rel='noopener noreferrer'>cs.vu.nl/~ast/intel/</a>"
assert AutoLinker.link(text, scheme: true) == expected
text = "https://forum.zdoom.org/viewtopic.php?f=44&t=57087"
expected =
"<a href='https://forum.zdoom.org/viewtopic.php?f=44&t=57087' class='auto-linker' target='_blank' rel='noopener noreferrer'>forum.zdoom.org/viewtopic.php?f=44&t=57087</a>"
assert AutoLinker.link(text, scheme: true) == expected
text = "https://en.wikipedia.org/wiki/Sophia_(Gnosticism)#Mythos_of_the_soul"
expected =
"<a href='https://en.wikipedia.org/wiki/Sophia_(Gnosticism)#Mythos_of_the_soul' class='auto-linker' target='_blank' rel='noopener noreferrer'>en.wikipedia.org/wiki/Sophia_(Gnosticism)#Mythos_of_the_soul</a>"
assert AutoLinker.link(text, scheme: true) == expected
text = "https://en.wikipedia.org/wiki/Duff's_device"
expected =
"<a href='https://en.wikipedia.org/wiki/Duff's_device' class='auto-linker' target='_blank' rel='noopener noreferrer'>en.wikipedia.org/wiki/Duff's_device</a>"
assert AutoLinker.link(text, scheme: true) == expected
end
end
describe "non http links" do
test "xmpp" do
text = "xmpp:user@example.com"
expected = "<a href='xmpp:user@example.com' class='auto-linker'>xmpp:user@example.com</a>"
assert AutoLinker.link(text, extra: true) == expected
end
test "email" do
text = "user@example.com"
expected = "<a href='mailto:user@example.com' class='auto-linker'>user@example.com</a>"
assert AutoLinker.link(text, email: true) == expected
end
test "magnet" do
text =
"magnet:?xt=urn:btih:a4104a9d2f5615601c429fe8bab8177c47c05c84&dn=ubuntu-18.04.1.0-live-server-amd64.iso&tr=http%3A%2F%2Ftorrent.ubuntu.com%3A6969%2Fannounce&tr=http%3A%2F%2Fipv6.torrent.ubuntu.com%3A6969%2Fannounce"
expected =
"<a href='magnet:?xt=urn:btih:a4104a9d2f5615601c429fe8bab8177c47c05c84&dn=ubuntu-18.04.1.0-live-server-amd64.iso&tr=http%3A%2F%2Ftorrent.ubuntu.com%3A6969%2Fannounce&tr=http%3A%2F%2Fipv6.torrent.ubuntu.com%3A6969%2Fannounce' class='auto-linker'>magnet:?xt=urn:btih:a4104a9d2f5615601c429fe8bab8177c47c05c84&dn=ubuntu-18.04.1.0-live-server-amd64.iso&tr=http%3A%2F%2Ftorrent.ubuntu.com%3A6969%2Fannounce&tr=http%3A%2F%2Fipv6.torrent.ubuntu.com%3A6969%2Fannounce</a>"
assert AutoLinker.link(text, extra: true) == expected
end
test "dweb" do
text =
"dweb://584faa05d394190ab1a3f0240607f9bf2b7e2bd9968830a11cf77db0cea36a21+v1.0.0/path/to/file.txt"
expected =
"<a href='dweb://584faa05d394190ab1a3f0240607f9bf2b7e2bd9968830a11cf77db0cea36a21+v1.0.0/path/to/file.txt' class='auto-linker'>dweb://584faa05d394190ab1a3f0240607f9bf2b7e2bd9968830a11cf77db0cea36a21+v1.0.0/path/to/file.txt</a>"
assert AutoLinker.link(text, extra: true) == expected
end
end
describe "TLDs" do
test "parse with scheme" do
text = "https://google.com"