Try to make intelligent decisions when deciding if parens should be stripped before linking

This logic is torture and needs some rework.

Rules:

- Always strip leading (, as it can't be part of a URL
- Short circuit to only strip leading if no trailing
- If valid email address when trailing ) stripped, we can strip trailing )
- If not even a valid URL without trailing ), short circuit to only strip leading
- If query parameters detected, strip trailing. It should have been encoded as %29.
- If there isn't a / the trailing ) can't be part of the URL, strip trailing.
- If there isn't at least one ( in the URI.path, only strip leading. Assume ) is not part of the URL.
- If we have an equal count of ( and ) chars with the leading ( already stripped, only strip leading
This commit is contained in:
Mark Felder 2021-01-26 18:30:35 -06:00
parent bd7a759911
commit 26fa3bffeb
2 changed files with 28 additions and 5 deletions

View file

@ -201,10 +201,24 @@ defmodule Linkify.Parser do
if String.starts_with?(buffer, @prefix_extra), do: link_extra(buffer, opts), else: :nomatch
end
defp strip_parens(buffer) do
buffer
|> String.trim_leading("(")
|> String.trim_trailing(")")
defp maybe_strip_parens(buffer) do
with buffer = String.trim_leading(buffer, "("),
true <- String.ends_with?(buffer, ")"),
false <- buffer |> String.trim_trailing(")") |> email?(nil),
true <- buffer |> String.trim_trailing(")") |> url?(nil),
%{path: path, query: query} = URI.parse(buffer),
false <- not is_nil(query),
false <- not String.contains?(path, "/"),
false <- not String.contains?(path, "("),
graphemes = String.graphemes(buffer),
openidx = graphemes |> Enum.find_index(fn x -> x == "(" end),
closeidx = graphemes |> Enum.find_index(fn x -> x == ")" end),
true <- openidx < closeidx do
buffer
else
false -> buffer |> String.trim_leading("(")
true -> buffer |> String.trim_leading("(") |> String.trim_trailing(")")
end
end
defp strip_punctuation(buffer), do: String.replace(buffer, @delimiters, "")
@ -368,7 +382,7 @@ defmodule Linkify.Parser do
|> String.split("<")
|> List.first()
|> strip_punctuation()
|> strip_parens()
|> maybe_strip_parens()
case check_and_link(type, str, opts, user_acc) do
:nomatch ->

View file

@ -776,5 +776,14 @@ defmodule LinkifyTest do
assert Linkify.link(text) == expected
end
test "URLs with last character is closing paren" do
text = "Have you read https://en.wikipedia.org/wiki/Frame_(networking)?"
expected =
"Have you read <a href=\"https://en.wikipedia.org/wiki/Frame_(networking)\">https://en.wikipedia.org/wiki/Frame_(networking)</a>?"
assert Linkify.link(text) == expected
end
end
end