Merge branch 'fix/parens' into 'master'

Improve parsing links inside parentheses and some refactoring

Closes #5

See merge request pleroma/auto_linker!16
This commit is contained in:
Egor 2019-06-12 08:35:39 +00:00
commit e2385402bc
2 changed files with 55 additions and 57 deletions

View file

@ -259,27 +259,33 @@ defmodule AutoLinker.Parser do
defp do_parse({<<ch::8>> <> text, user_acc}, opts, {buffer, acc, state}, handler), defp do_parse({<<ch::8>> <> text, user_acc}, opts, {buffer, acc, state}, handler),
do: do_parse({text, user_acc}, opts, {buffer <> <<ch::8>>, acc, state}, handler) do: do_parse({text, user_acc}, opts, {buffer <> <<ch::8>>, acc, state}, handler)
def check_and_link(buffer, %{scheme: true} = opts, _user_acc) do def check_and_link(buffer, opts, _user_acc) do
if is_url?(buffer, opts[:scheme]) do str = strip_parens(buffer)
case Regex.run(@match_scheme, buffer, capture: [:url]) do
[^buffer] -> link_url(true, buffer, opts) if url?(str, opts[:scheme]) do
[url] -> String.replace(buffer, url, link_url(true, url, opts)) case parse_link(str, opts) do
^buffer -> link_url(buffer, opts)
url -> String.replace(buffer, url, link_url(url, opts))
end end
else else
buffer buffer
end end
end end
def check_and_link(buffer, opts, _user_acc) do defp parse_link(str, %{scheme: true}) do
buffer @match_scheme |> Regex.run(str, capture: [:url]) |> hd()
|> is_url?(opts[:scheme])
|> link_url(buffer, opts)
end end
defp parse_link(str, _), do: str
defp strip_parens("(" <> buffer) do
~r/[^\)]*/ |> Regex.run(buffer) |> hd()
end
defp strip_parens(buffer), do: buffer
def check_and_link_email(buffer, opts, _user_acc) do def check_and_link_email(buffer, opts, _user_acc) do
buffer if email?(buffer), do: link_email(buffer, opts), else: buffer
|> is_email?
|> link_email(buffer, opts)
end end
def check_and_link_phone(buffer, opts, _user_acc) do def check_and_link_phone(buffer, opts, _user_acc) do
@ -301,45 +307,31 @@ defmodule AutoLinker.Parser do
end end
def check_and_link_extra("xmpp:" <> handle, opts, _user_acc) do def check_and_link_extra("xmpp:" <> handle, opts, _user_acc) do
handle if email?(handle), do: link_extra("xmpp:" <> handle, opts), else: handle
|> is_email?
|> link_extra("xmpp:" <> handle, opts)
end end
def check_and_link_extra(buffer, opts, _user_acc) do def check_and_link_extra(buffer, opts, _user_acc) do
buffer if String.starts_with?(buffer, @prefix_extra), do: link_extra(buffer, opts), else: buffer
|> String.starts_with?(@prefix_extra)
|> link_extra(buffer, opts)
end end
# @doc false # @doc false
def is_url?(buffer, true) do def url?(buffer, true) do
if Regex.match?(@invalid_url, buffer) do valid_url?(buffer) && Regex.match?(@match_scheme, buffer) && valid_tld?(buffer)
false
else
@match_scheme |> Regex.match?(buffer) |> is_valid_tld?(buffer)
end
end end
def is_url?(buffer, _) do def url?(buffer, _) do
if Regex.match?(@invalid_url, buffer) do valid_url?(buffer) && Regex.match?(@match_url, buffer) && valid_tld?(buffer)
false
else
@match_url |> Regex.match?(buffer) |> is_valid_tld?(buffer)
end
end end
def is_email?(buffer) do def email?(buffer) do
if Regex.match?(@invalid_url, buffer) do valid_url?(buffer) && Regex.match?(@match_email, buffer) && valid_tld?(buffer)
false
else
@match_email |> Regex.match?(buffer) |> is_valid_tld?(buffer)
end
end end
def is_valid_tld?(true, buffer) do defp valid_url?(url), do: !Regex.match?(@invalid_url, url)
def valid_tld?(buffer) do
with [host] <- Regex.run(@match_hostname, buffer, capture: [:host]) do with [host] <- Regex.run(@match_hostname, buffer, capture: [:host]) do
if is_ip?(host) do if ip?(host) do
true true
else else
tld = host |> String.split(".") |> List.last() tld = host |> String.split(".") |> List.last()
@ -350,11 +342,7 @@ defmodule AutoLinker.Parser do
end end
end end
def is_valid_tld?(false, _), do: false def ip?(buffer), do: Regex.match?(@match_ip, buffer)
def is_ip?(buffer) do
Regex.match?(@match_ip, buffer)
end
@doc false @doc false
def match_phone(buffer) do def match_phone(buffer) do
@ -425,25 +413,19 @@ defmodule AutoLinker.Parser do
end end
@doc false @doc false
def link_url(true, buffer, opts) do def link_url(buffer, opts) do
Builder.create_link(buffer, opts) Builder.create_link(buffer, opts)
end end
def link_url(_, buffer, _opts), do: buffer
@doc false @doc false
def link_email(true, buffer, opts) do def link_email(buffer, opts) do
Builder.create_email_link(buffer, opts) Builder.create_email_link(buffer, opts)
end end
def link_email(_, buffer, _opts), do: buffer def link_extra(buffer, opts) do
def link_extra(true, buffer, opts) do
Builder.create_extra_link(buffer, opts) Builder.create_extra_link(buffer, opts)
end end
def link_extra(_, buffer, _opts), do: buffer
defp run_handler(handler, buffer, opts, user_acc) do defp run_handler(handler, buffer, opts, user_acc) do
case handler.(buffer, opts, user_acc) do case handler.(buffer, opts, user_acc) do
{buffer, user_acc} -> {buffer, user_acc} {buffer, user_acc} -> {buffer, user_acc}

View file

@ -4,32 +4,32 @@ defmodule AutoLinker.ParserTest do
import AutoLinker.Parser import AutoLinker.Parser
describe "is_url" do describe "url?/2" do
test "valid scheme true" do test "valid scheme true" do
valid_scheme_urls() valid_scheme_urls()
|> Enum.each(fn url -> |> Enum.each(fn url ->
assert is_url?(url, true) assert url?(url, true)
end) end)
end end
test "invalid scheme true" do test "invalid scheme true" do
invalid_scheme_urls() invalid_scheme_urls()
|> Enum.each(fn url -> |> Enum.each(fn url ->
refute is_url?(url, true) refute url?(url, true)
end) end)
end end
test "valid scheme false" do test "valid scheme false" do
valid_non_scheme_urls() valid_non_scheme_urls()
|> Enum.each(fn url -> |> Enum.each(fn url ->
assert is_url?(url, false) assert url?(url, false)
end) end)
end end
test "invalid scheme false" do test "invalid scheme false" do
invalid_non_scheme_urls() invalid_non_scheme_urls()
|> Enum.each(fn url -> |> Enum.each(fn url ->
refute is_url?(url, false) refute url?(url, false)
end) end)
end end
end end
@ -106,6 +106,22 @@ defmodule AutoLinker.ParserTest do
assert parse(text, exclude_patterns: ["```"]) == text assert parse(text, exclude_patterns: ["```"]) == text
end end
test "do not link parens" do
text = " foo (https://example.com/path/folder/), bar"
expected =
" foo (<a href=\"https://example.com/path/folder/\">example.com/path/folder/</a>), bar"
assert parse(text, class: false, rel: false, new_window: false, scheme: true) == expected
text = " foo (example.com/path/folder/), bar"
expected =
" foo (<a href=\"http://example.com/path/folder/\">example.com/path/folder/</a>), bar"
assert parse(text, class: false, rel: false, new_window: false) == expected
end
test "do not link urls" do test "do not link urls" do
text = "google.com" text = "google.com"
assert parse(text, url: false, phone: true) == text assert parse(text, url: false, phone: true) == text