Merge branch 'improve-parsing' into 'master'

Improve Parser

See merge request pleroma/auto_linker!10
This commit is contained in:
Egor 2019-04-09 08:05:29 +00:00
commit a3ec8eb797
5 changed files with 114 additions and 42 deletions

View file

@ -82,7 +82,7 @@ defmodule AutoLinker.Builder do
defp truncate(url, len) when len < 3, do: url
defp truncate(url, len) do
if String.length(url) > len, do: String.slice(url, 0, len - 2) <> "..", else: url
if String.length(url) > len, do: String.slice(url, 0, len - 2) <> "...", else: url
end
defp strip_prefix(url, true) do

View file

@ -5,31 +5,11 @@ defmodule AutoLinker.Parser do
alias AutoLinker.Builder
@doc """
Parse the given string, identifying items to link.
Parses the string, replacing the matching urls and phone numbers with an html link.
## Examples
iex> AutoLinker.Parser.parse("Check out google.com")
~s{Check out <a href="http://google.com" class="auto-linker" target="_blank" rel="noopener noreferrer">google.com</a>}
iex> AutoLinker.Parser.parse("call me at x9999", phone: true)
~s{call me at <a href="#" class="phone-number" data-phone="9999">x9999</a>}
iex> AutoLinker.Parser.parse("or at home on 555.555.5555", phone: true)
~s{or at home on <a href="#" class="phone-number" data-phone="5555555555">555.555.5555</a>}
iex> AutoLinker.Parser.parse(", work (555) 555-5555", phone: true)
~s{, work <a href="#" class="phone-number" data-phone="5555555555">(555) 555-5555</a>}
"""
@invalid_url ~r/(\.\.+)|(^(\d+\.){1,2}\d+$)/
@match_url ~r{^[\w\.-]+(?:\.[\w\.-]+)+[\w\-\._~%:/?#[\]@!\$&'\(\)\*\+,;=.]+$}
@match_scheme ~r{^(?:\W*)?(?<url>(?:\W*https?:\/\/)?[\w.-]+(?:\.[\w\.-]+)+[\w\-\._~%:\/?#[\]@!\$&'\(\)\*\+,;=.]+$)}u
@match_scheme ~r{^(?:\W*)?(?<url>(?:https?:\/\/)?[\w.-]+(?:\.[\w\.-]+)+[\w\-\._~%:\/?#[\]@!\$&'\(\)\*\+,;=.]+$)}u
@match_phone ~r"((?:x\d{2,7})|(?:(?:\+?1\s?(?:[.-]\s?)?)?(?:\(\s?(?:[2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9])\s?\)|(?:[2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9]))\s?(?:[.-]\s?)?)(?:[2-9]1[02-9]|[2-9][02-9]1|[2-9][02-9]{2})\s?(?:[.-]\s?)?(?:[0-9]{4}))"
@ -64,6 +44,26 @@ defmodule AutoLinker.Parser do
@default_opts ~w(url)a
@doc """
Parse the given string, identifying items to link.
Parses the string, replacing the matching urls and phone numbers with an html link.
## Examples
iex> AutoLinker.Parser.parse("Check out google.com")
~s{Check out <a href="http://google.com" class="auto-linker" target="_blank" rel="noopener noreferrer">google.com</a>}
iex> AutoLinker.Parser.parse("call me at x9999", phone: true)
~s{call me at <a href="#" class="phone-number" data-phone="9999">x9999</a>}
iex> AutoLinker.Parser.parse("or at home on 555.555.5555", phone: true)
~s{or at home on <a href="#" class="phone-number" data-phone="5555555555">555.555.5555</a>}
iex> AutoLinker.Parser.parse(", work (555) 555-5555", phone: true)
~s{, work <a href="#" class="phone-number" data-phone="5555555555">(555) 555-5555</a>}
"""
def parse(input, opts \\ %{})
def parse(input, opts) when is_binary(input), do: {input, nil} |> parse(opts) |> elem(0)
def parse(input, list) when is_list(list), do: parse(input, Enum.into(list, %{}))
@ -154,20 +154,31 @@ defmodule AutoLinker.Parser do
defp do_parse({"", user_acc}, _opts, {"", acc, _}, _handler),
do: {acc, user_acc}
defp do_parse({"", user_acc}, opts, {buffer, acc, _}, handler) do
{buffer, user_acc} = run_handler(handler, buffer, opts, user_acc)
{acc <> buffer, user_acc}
end
defp do_parse({"<a" <> text, user_acc}, opts, {buffer, acc, :parsing}, handler),
do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "<a", :skip}, handler)
defp do_parse({"<pre" <> text, user_acc}, opts, {buffer, acc, :parsing}, handler),
do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "<pre", :skip}, handler)
defp do_parse({"<code" <> text, user_acc}, opts, {buffer, acc, :parsing}, handler),
do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "<code", :skip}, handler)
defp do_parse({"</a>" <> text, user_acc}, opts, {buffer, acc, :skip}, handler),
do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "</a>", :parsing}, handler)
defp do_parse({"</pre>" <> text, user_acc}, opts, {buffer, acc, :skip}, handler),
do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "</pre>", :parsing}, handler)
defp do_parse({"</code>" <> text, user_acc}, opts, {buffer, acc, :skip}, handler),
do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "</code>", :parsing}, handler)
defp do_parse({"<" <> text, user_acc}, opts, {"", acc, :parsing}, handler),
do: do_parse({text, user_acc}, opts, {"<", acc, {:open, 1}}, handler)
defp do_parse({"<" <> text, user_acc}, opts, {"", acc, {:html, level}}, handler) do
do_parse({text, user_acc}, opts, {"<", acc, {:open, level + 1}}, handler)
end
defp do_parse({">" <> text, user_acc}, opts, {buffer, acc, {:attrs, level}}, handler),
do:
do_parse(
@ -204,19 +215,8 @@ defmodule AutoLinker.Parser do
handler
)
defp do_parse(
{<<char::bytes-size(1), text::binary>>, user_acc},
opts,
{buffer, acc, {:open, level}},
handler
)
when char in [" ", "\r", "\n"] do
do_parse(
{text, user_acc},
opts,
{"", acc <> buffer <> char, {:attrs, level}},
handler
)
defp do_parse({text, user_acc}, opts, {buffer, acc, {:open, level}}, handler) do
do_parse({text, user_acc}, opts, {"", acc <> buffer, {:attrs, level}}, handler)
end
# default cases where state is not important

View file

@ -61,6 +61,24 @@ defmodule AutoLinkerTest do
new_window: false,
rel: custom_rel
) == expected
text = "google.com"
expected = "<a href=\"http://google.com\">google.com</a>"
custom_rel = fn _ -> nil end
assert AutoLinker.link(text,
class: false,
new_window: false,
rel: custom_rel
) == expected
end
test "link_map/2" do
assert AutoLinker.link_map("google.com", []) ==
{"<a href=\"http://google.com\" class=\"auto-linker\" target=\"_blank\" rel=\"noopener noreferrer\">google.com</a>",
[]}
end
describe "custom handlers" do
@ -144,6 +162,22 @@ defmodule AutoLinkerTest do
) == expected
end
test "mentions inside html tags" do
text =
"<p><strong>hello world</strong></p>\n<p><`em>another @user__test and @user__test google.com paragraph</em></p>\n"
expected =
"<p><strong>hello world</strong></p>\n<p><`em>another <a href=\"u/user__test\">@user__test</a> and <a href=\"u/user__test\">@user__test</a> <a href=\"http://google.com\">google.com</a> paragraph</em></p>\n"
assert AutoLinker.link(text,
mention: true,
mention_prefix: "u/",
class: false,
rel: false,
new_window: false
) == expected
end
test "metion @user@example.com" do
text = "hey @user@example.com"

View file

@ -17,6 +17,16 @@ defmodule AutoLinker.BuilderTest do
"<a href=\"http://text\" class=\"auto-linker\" target=\"_blank\" rel=\"me\">text</a>"
assert create_link("text", %{rel: "me"}) == expected
expected = "<a href=\"http://text\" class=\"auto-linker\" target=\"_blank\">t...</a>"
assert create_link("text", %{truncate: 3, rel: false}) == expected
expected = "<a href=\"http://text\" class=\"auto-linker\" target=\"_blank\">text</a>"
assert create_link("text", %{truncate: 2, rel: false}) == expected
expected = "<a href=\"http://text\" class=\"auto-linker\" target=\"_blank\">http://text</a>"
assert create_link("http://text", %{rel: false, strip_prefix: false}) == expected
end
test "create_markdown_links/2" do
@ -52,9 +62,9 @@ defmodule AutoLinker.BuilderTest do
phrase = "my exten is x888. Call me."
expected =
~s'my exten is <a href="#" class="phone-number" data-phone="888">x888</a>. Call me.'
~s'my exten is <a href="#" class="phone-number" data-phone="888" test=\"test\">x888</a>. Call me.'
assert create_phone_link([["x888", ""]], phrase, []) == expected
assert create_phone_link([["x888", ""]], phrase, attributes: [test: "test"]) == expected
end
test "handles multiple links" do

View file

@ -69,7 +69,24 @@ defmodule AutoLinker.ParserTest do
assert parse(text) == text
end
test "does not link inside `<pre>` and `<code>`" do
text = "<pre>google.com</pre>"
assert parse(text) == text
text = "<code>google.com</code>"
assert parse(text) == text
text = "<pre><code>google.com</code></pre>"
assert parse(text) == text
end
test "links url inside html" do
text = "<div>google.com</div>"
expected = "<div><a href=\"http://google.com\">google.com</a></div>"
assert parse(text, class: false, rel: false, new_window: false, phone: false) == expected
text = "Check out <div class='section'>google.com</div>"
expected =
@ -78,10 +95,21 @@ defmodule AutoLinker.ParserTest do
assert parse(text, class: false, rel: false, new_window: false) == expected
end
test "links url inside nested html" do
text = "<p><strong>google.com</strong></p>"
expected = "<p><strong><a href=\"http://google.com\">google.com</a></strong></p>"
assert parse(text, class: false, rel: false, new_window: false) == expected
end
test "excludes html with specified class" do
text = "```Check out <div class='section'>google.com</div>```"
assert parse(text, exclude_patterns: ["```"]) == text
end
test "do not link urls" do
text = "google.com"
assert parse(text, url: false, phone: true) == text
end
end
def valid_number?([list], number) do