From 26fa3bffeba86d527cd9ad7bc320e76e150cd1a4 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Tue, 26 Jan 2021 18:30:35 -0600 Subject: [PATCH 1/8] Try to make intelligent decisions when deciding if parens should be stripped before linking This logic is torture and needs some rework. Rules: - Always strip leading (, as it can't be part of a URL - Short circuit to only strip leading if no trailing - If valid email address when trailing ) stripped, we can strip trailing ) - If not even a valid URL without trailing ), short circuit to only strip leading - If query parameters detected, strip trailing. It should have been encoded as %29. - If there isn't a / the trailing ) can't be part of the URL, strip trailing. - If there isn't at least one ( in the URI.path, only strip leading. Assume ) is not part of the URL. - If we have an equal count of ( and ) chars with the leading ( already stripped, only strip leading --- lib/linkify/parser.ex | 24 +++++++++++++++++++----- test/linkify_test.exs | 9 +++++++++ 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/lib/linkify/parser.ex b/lib/linkify/parser.ex index 325b69c..73cc8ad 100644 --- a/lib/linkify/parser.ex +++ b/lib/linkify/parser.ex @@ -201,10 +201,24 @@ defmodule Linkify.Parser do if String.starts_with?(buffer, @prefix_extra), do: link_extra(buffer, opts), else: :nomatch end - defp strip_parens(buffer) do - buffer - |> String.trim_leading("(") - |> String.trim_trailing(")") + defp maybe_strip_parens(buffer) do + with buffer = String.trim_leading(buffer, "("), + true <- String.ends_with?(buffer, ")"), + false <- buffer |> String.trim_trailing(")") |> email?(nil), + true <- buffer |> String.trim_trailing(")") |> url?(nil), + %{path: path, query: query} = URI.parse(buffer), + false <- not is_nil(query), + false <- not String.contains?(path, "/"), + false <- not String.contains?(path, "("), + graphemes = String.graphemes(buffer), + openidx = graphemes |> Enum.find_index(fn x -> x == "(" end), + closeidx = graphemes |> Enum.find_index(fn x -> x == ")" end), + true <- openidx < closeidx do + buffer + else + false -> buffer |> String.trim_leading("(") + true -> buffer |> String.trim_leading("(") |> String.trim_trailing(")") + end end defp strip_punctuation(buffer), do: String.replace(buffer, @delimiters, "") @@ -368,7 +382,7 @@ defmodule Linkify.Parser do |> String.split("<") |> List.first() |> strip_punctuation() - |> strip_parens() + |> maybe_strip_parens() case check_and_link(type, str, opts, user_acc) do :nomatch -> diff --git a/test/linkify_test.exs b/test/linkify_test.exs index 64489c1..0225ade 100644 --- a/test/linkify_test.exs +++ b/test/linkify_test.exs @@ -776,5 +776,14 @@ defmodule LinkifyTest do assert Linkify.link(text) == expected end + + test "URLs with last character is closing paren" do + text = "Have you read https://en.wikipedia.org/wiki/Frame_(networking)?" + + expected = + "Have you read https://en.wikipedia.org/wiki/Frame_(networking)?" + + assert Linkify.link(text) == expected + end end end From aec97fac71a3035c92f947c4bcfa1aa453651239 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Tue, 26 Jan 2021 19:03:29 -0600 Subject: [PATCH 2/8] Split checks into functions returning atoms to make the decisions --- lib/linkify/parser.ex | 52 ++++++++++++++++++++++++++++++------------- 1 file changed, 37 insertions(+), 15 deletions(-) diff --git a/lib/linkify/parser.ex b/lib/linkify/parser.ex index 73cc8ad..554eb03 100644 --- a/lib/linkify/parser.ex +++ b/lib/linkify/parser.ex @@ -202,22 +202,44 @@ defmodule Linkify.Parser do end defp maybe_strip_parens(buffer) do - with buffer = String.trim_leading(buffer, "("), - true <- String.ends_with?(buffer, ")"), - false <- buffer |> String.trim_trailing(")") |> email?(nil), - true <- buffer |> String.trim_trailing(")") |> url?(nil), - %{path: path, query: query} = URI.parse(buffer), - false <- not is_nil(query), - false <- not String.contains?(path, "/"), - false <- not String.contains?(path, "("), - graphemes = String.graphemes(buffer), - openidx = graphemes |> Enum.find_index(fn x -> x == "(" end), - closeidx = graphemes |> Enum.find_index(fn x -> x == ")" end), - true <- openidx < closeidx do - buffer + trimmed = String.trim_leading(buffer, "(") + + with :next <- parens_check_trailing(buffer), + :next <- parens_found_email(trimmed), + :next <- parens_found_url(trimmed), + %{path: path, query: query} = URI.parse(trimmed), + :next <- parens_in_query(query), + :next <- parens_found_path_separator(path), + :next <- parens_path_has_open_paren(path), + :next <- parens_check_balanced(trimmed) do + buffer |> String.trim_leading("(") |> String.trim_trailing(")") else - false -> buffer |> String.trim_leading("(") - true -> buffer |> String.trim_leading("(") |> String.trim_trailing(")") + :both -> buffer |> String.trim_leading("(") |> String.trim_trailing(")") + :noop -> buffer + _ -> buffer + end + end + + defp parens_check_trailing(buffer), do: (String.ends_with?(buffer, ")") && :next) || :noop + + defp parens_found_email(trimmed), + do: (String.trim_trailing(trimmed, ")") |> email?(nil) && :both) || :next + + defp parens_found_url(trimmed), + do: (String.trim_trailing(trimmed, ")") |> url?(nil) && :next) || :noop + + defp parens_in_query(query), do: (is_nil(query) && :next) || :both + defp parens_found_path_separator(path), do: (String.contains?(path, "/") && :next) || :both + defp parens_path_has_open_paren(path), do: (String.contains?(path, "(") && :next) || :both + + defp parens_check_balanced(trimmed) do + graphemes = String.graphemes(trimmed) + opencnt = graphemes |> Enum.count(fn x -> x == "(" end) + closecnt = graphemes |> Enum.count(fn x -> x == ")" end) + + cond do + opencnt == closecnt -> :leading_only + true -> :next end end From a7e86d3381b8c4ff91b0d518d516edb8c604067f Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Wed, 27 Jan 2021 14:42:05 -0600 Subject: [PATCH 3/8] Credo --- lib/linkify/parser.ex | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/linkify/parser.ex b/lib/linkify/parser.ex index 554eb03..71ac58d 100644 --- a/lib/linkify/parser.ex +++ b/lib/linkify/parser.ex @@ -237,9 +237,10 @@ defmodule Linkify.Parser do opencnt = graphemes |> Enum.count(fn x -> x == "(" end) closecnt = graphemes |> Enum.count(fn x -> x == ")" end) - cond do - opencnt == closecnt -> :leading_only - true -> :next + if opencnt == closecnt do + :leading_only + else + :next end end From 14e1f64a09c5328ef7477beb9d45392d85e73c70 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Wed, 27 Jan 2021 14:58:39 -0600 Subject: [PATCH 4/8] Missing operation for only trimming leading --- lib/linkify/parser.ex | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/linkify/parser.ex b/lib/linkify/parser.ex index 71ac58d..1e5fd4b 100644 --- a/lib/linkify/parser.ex +++ b/lib/linkify/parser.ex @@ -215,6 +215,7 @@ defmodule Linkify.Parser do buffer |> String.trim_leading("(") |> String.trim_trailing(")") else :both -> buffer |> String.trim_leading("(") |> String.trim_trailing(")") + :leading_only -> buffer |> String.trim_leading("(") :noop -> buffer _ -> buffer end From 75f68977af92834f18675c4ca103a5cefbe38578 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Wed, 27 Jan 2021 15:49:42 -0600 Subject: [PATCH 5/8] Add own trim functions that only strips one character This makes it possible to support URLs that end in ) but are in another () --- lib/linkify/parser.ex | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/lib/linkify/parser.ex b/lib/linkify/parser.ex index 1e5fd4b..30d47ee 100644 --- a/lib/linkify/parser.ex +++ b/lib/linkify/parser.ex @@ -202,7 +202,7 @@ defmodule Linkify.Parser do end defp maybe_strip_parens(buffer) do - trimmed = String.trim_leading(buffer, "(") + trimmed = trim_leading_paren(buffer) with :next <- parens_check_trailing(buffer), :next <- parens_found_email(trimmed), @@ -212,10 +212,10 @@ defmodule Linkify.Parser do :next <- parens_found_path_separator(path), :next <- parens_path_has_open_paren(path), :next <- parens_check_balanced(trimmed) do - buffer |> String.trim_leading("(") |> String.trim_trailing(")") + buffer |> trim_leading_paren |> trim_trailing_paren else - :both -> buffer |> String.trim_leading("(") |> String.trim_trailing(")") - :leading_only -> buffer |> String.trim_leading("(") + :both -> buffer |> trim_leading_paren |> trim_trailing_paren + :leading_only -> buffer |> trim_leading_paren :noop -> buffer _ -> buffer end @@ -224,10 +224,10 @@ defmodule Linkify.Parser do defp parens_check_trailing(buffer), do: (String.ends_with?(buffer, ")") && :next) || :noop defp parens_found_email(trimmed), - do: (String.trim_trailing(trimmed, ")") |> email?(nil) && :both) || :next + do: (trim_trailing_paren(trimmed) |> email?(nil) && :both) || :next defp parens_found_url(trimmed), - do: (String.trim_trailing(trimmed, ")") |> url?(nil) && :next) || :noop + do: (trim_trailing_paren(trimmed) |> url?(nil) && :next) || :noop defp parens_in_query(query), do: (is_nil(query) && :next) || :both defp parens_found_path_separator(path), do: (String.contains?(path, "/") && :next) || :both @@ -245,6 +245,16 @@ defmodule Linkify.Parser do end end + defp trim_leading_paren(buffer), + do: + (String.starts_with?(buffer, "(") && String.slice(buffer, 1, String.length(buffer))) || + buffer + + defp trim_trailing_paren(buffer), + do: + (String.ends_with?(buffer, ")") && String.slice(buffer, 0, String.length(buffer) - 1)) || + buffer + defp strip_punctuation(buffer), do: String.replace(buffer, @delimiters, "") def url?(buffer, opts) do From 5191250de4173ec51df108910eb5af94469cfd93 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Thu, 28 Jan 2021 09:53:16 -0600 Subject: [PATCH 6/8] Document improved URL detection in parentheticals, target 0.5.0 release because it's a fairly big change. --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 095ed6e..159231c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## 0.5.0 - 2021-XX-XX + +### Added + +- More robust detection of URLs inside a parenthetical + ## 0.4.1 - 2020-12-21 ### Fixed From fb88baabacebe1b3c5692d73dc4c7997cc6ce586 Mon Sep 17 00:00:00 2001 From: feld Date: Thu, 28 Jan 2021 17:16:39 +0000 Subject: [PATCH 7/8] Apply 1 suggestion(s) to 1 file(s) --- lib/linkify/parser.ex | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/lib/linkify/parser.ex b/lib/linkify/parser.ex index 30d47ee..5791edf 100644 --- a/lib/linkify/parser.ex +++ b/lib/linkify/parser.ex @@ -245,10 +245,14 @@ defmodule Linkify.Parser do end end - defp trim_leading_paren(buffer), - do: - (String.starts_with?(buffer, "(") && String.slice(buffer, 1, String.length(buffer))) || - buffer + defp trim_leading_paren(buffer) do + if String.starts_with?(buffer, "(") do + "(" <> buffer = buffer + buffer + else + buffer + end + end defp trim_trailing_paren(buffer), do: From 1bc845f9e484a6e04d6b7696cb4ce2970c3773aa Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Thu, 28 Jan 2021 12:45:07 -0600 Subject: [PATCH 8/8] Small optimization --- lib/linkify/parser.ex | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/lib/linkify/parser.ex b/lib/linkify/parser.ex index 5791edf..a6c5f3a 100644 --- a/lib/linkify/parser.ex +++ b/lib/linkify/parser.ex @@ -246,11 +246,9 @@ defmodule Linkify.Parser do end defp trim_leading_paren(buffer) do - if String.starts_with?(buffer, "(") do - "(" <> buffer = buffer - buffer - else - buffer + case buffer do + "(" <> buffer -> buffer + buffer -> buffer end end