Merge branch 'improve-parsing' into 'master'
Improve Parser See merge request pleroma/auto_linker!10
This commit is contained in:
commit
a3ec8eb797
5 changed files with 114 additions and 42 deletions
|
@ -82,7 +82,7 @@ defmodule AutoLinker.Builder do
|
|||
defp truncate(url, len) when len < 3, do: url
|
||||
|
||||
defp truncate(url, len) do
|
||||
if String.length(url) > len, do: String.slice(url, 0, len - 2) <> "..", else: url
|
||||
if String.length(url) > len, do: String.slice(url, 0, len - 2) <> "...", else: url
|
||||
end
|
||||
|
||||
defp strip_prefix(url, true) do
|
||||
|
|
|
@ -5,31 +5,11 @@ defmodule AutoLinker.Parser do
|
|||
|
||||
alias AutoLinker.Builder
|
||||
|
||||
@doc """
|
||||
Parse the given string, identifying items to link.
|
||||
|
||||
Parses the string, replacing the matching urls and phone numbers with an html link.
|
||||
|
||||
## Examples
|
||||
|
||||
iex> AutoLinker.Parser.parse("Check out google.com")
|
||||
~s{Check out <a href="http://google.com" class="auto-linker" target="_blank" rel="noopener noreferrer">google.com</a>}
|
||||
|
||||
iex> AutoLinker.Parser.parse("call me at x9999", phone: true)
|
||||
~s{call me at <a href="#" class="phone-number" data-phone="9999">x9999</a>}
|
||||
|
||||
iex> AutoLinker.Parser.parse("or at home on 555.555.5555", phone: true)
|
||||
~s{or at home on <a href="#" class="phone-number" data-phone="5555555555">555.555.5555</a>}
|
||||
|
||||
iex> AutoLinker.Parser.parse(", work (555) 555-5555", phone: true)
|
||||
~s{, work <a href="#" class="phone-number" data-phone="5555555555">(555) 555-5555</a>}
|
||||
"""
|
||||
|
||||
@invalid_url ~r/(\.\.+)|(^(\d+\.){1,2}\d+$)/
|
||||
|
||||
@match_url ~r{^[\w\.-]+(?:\.[\w\.-]+)+[\w\-\._~%:/?#[\]@!\$&'\(\)\*\+,;=.]+$}
|
||||
|
||||
@match_scheme ~r{^(?:\W*)?(?<url>(?:\W*https?:\/\/)?[\w.-]+(?:\.[\w\.-]+)+[\w\-\._~%:\/?#[\]@!\$&'\(\)\*\+,;=.]+$)}u
|
||||
@match_scheme ~r{^(?:\W*)?(?<url>(?:https?:\/\/)?[\w.-]+(?:\.[\w\.-]+)+[\w\-\._~%:\/?#[\]@!\$&'\(\)\*\+,;=.]+$)}u
|
||||
|
||||
@match_phone ~r"((?:x\d{2,7})|(?:(?:\+?1\s?(?:[.-]\s?)?)?(?:\(\s?(?:[2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9])\s?\)|(?:[2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9]))\s?(?:[.-]\s?)?)(?:[2-9]1[02-9]|[2-9][02-9]1|[2-9][02-9]{2})\s?(?:[.-]\s?)?(?:[0-9]{4}))"
|
||||
|
||||
|
@ -64,6 +44,26 @@ defmodule AutoLinker.Parser do
|
|||
|
||||
@default_opts ~w(url)a
|
||||
|
||||
@doc """
|
||||
Parse the given string, identifying items to link.
|
||||
|
||||
Parses the string, replacing the matching urls and phone numbers with an html link.
|
||||
|
||||
## Examples
|
||||
|
||||
iex> AutoLinker.Parser.parse("Check out google.com")
|
||||
~s{Check out <a href="http://google.com" class="auto-linker" target="_blank" rel="noopener noreferrer">google.com</a>}
|
||||
|
||||
iex> AutoLinker.Parser.parse("call me at x9999", phone: true)
|
||||
~s{call me at <a href="#" class="phone-number" data-phone="9999">x9999</a>}
|
||||
|
||||
iex> AutoLinker.Parser.parse("or at home on 555.555.5555", phone: true)
|
||||
~s{or at home on <a href="#" class="phone-number" data-phone="5555555555">555.555.5555</a>}
|
||||
|
||||
iex> AutoLinker.Parser.parse(", work (555) 555-5555", phone: true)
|
||||
~s{, work <a href="#" class="phone-number" data-phone="5555555555">(555) 555-5555</a>}
|
||||
"""
|
||||
|
||||
def parse(input, opts \\ %{})
|
||||
def parse(input, opts) when is_binary(input), do: {input, nil} |> parse(opts) |> elem(0)
|
||||
def parse(input, list) when is_list(list), do: parse(input, Enum.into(list, %{}))
|
||||
|
@ -154,20 +154,31 @@ defmodule AutoLinker.Parser do
|
|||
defp do_parse({"", user_acc}, _opts, {"", acc, _}, _handler),
|
||||
do: {acc, user_acc}
|
||||
|
||||
defp do_parse({"", user_acc}, opts, {buffer, acc, _}, handler) do
|
||||
{buffer, user_acc} = run_handler(handler, buffer, opts, user_acc)
|
||||
{acc <> buffer, user_acc}
|
||||
end
|
||||
|
||||
defp do_parse({"<a" <> text, user_acc}, opts, {buffer, acc, :parsing}, handler),
|
||||
do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "<a", :skip}, handler)
|
||||
|
||||
defp do_parse({"<pre" <> text, user_acc}, opts, {buffer, acc, :parsing}, handler),
|
||||
do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "<pre", :skip}, handler)
|
||||
|
||||
defp do_parse({"<code" <> text, user_acc}, opts, {buffer, acc, :parsing}, handler),
|
||||
do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "<code", :skip}, handler)
|
||||
|
||||
defp do_parse({"</a>" <> text, user_acc}, opts, {buffer, acc, :skip}, handler),
|
||||
do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "</a>", :parsing}, handler)
|
||||
|
||||
defp do_parse({"</pre>" <> text, user_acc}, opts, {buffer, acc, :skip}, handler),
|
||||
do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "</pre>", :parsing}, handler)
|
||||
|
||||
defp do_parse({"</code>" <> text, user_acc}, opts, {buffer, acc, :skip}, handler),
|
||||
do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "</code>", :parsing}, handler)
|
||||
|
||||
defp do_parse({"<" <> text, user_acc}, opts, {"", acc, :parsing}, handler),
|
||||
do: do_parse({text, user_acc}, opts, {"<", acc, {:open, 1}}, handler)
|
||||
|
||||
defp do_parse({"<" <> text, user_acc}, opts, {"", acc, {:html, level}}, handler) do
|
||||
do_parse({text, user_acc}, opts, {"<", acc, {:open, level + 1}}, handler)
|
||||
end
|
||||
|
||||
defp do_parse({">" <> text, user_acc}, opts, {buffer, acc, {:attrs, level}}, handler),
|
||||
do:
|
||||
do_parse(
|
||||
|
@ -204,19 +215,8 @@ defmodule AutoLinker.Parser do
|
|||
handler
|
||||
)
|
||||
|
||||
defp do_parse(
|
||||
{<<char::bytes-size(1), text::binary>>, user_acc},
|
||||
opts,
|
||||
{buffer, acc, {:open, level}},
|
||||
handler
|
||||
)
|
||||
when char in [" ", "\r", "\n"] do
|
||||
do_parse(
|
||||
{text, user_acc},
|
||||
opts,
|
||||
{"", acc <> buffer <> char, {:attrs, level}},
|
||||
handler
|
||||
)
|
||||
defp do_parse({text, user_acc}, opts, {buffer, acc, {:open, level}}, handler) do
|
||||
do_parse({text, user_acc}, opts, {"", acc <> buffer, {:attrs, level}}, handler)
|
||||
end
|
||||
|
||||
# default cases where state is not important
|
||||
|
|
|
@ -61,6 +61,24 @@ defmodule AutoLinkerTest do
|
|||
new_window: false,
|
||||
rel: custom_rel
|
||||
) == expected
|
||||
|
||||
text = "google.com"
|
||||
|
||||
expected = "<a href=\"http://google.com\">google.com</a>"
|
||||
|
||||
custom_rel = fn _ -> nil end
|
||||
|
||||
assert AutoLinker.link(text,
|
||||
class: false,
|
||||
new_window: false,
|
||||
rel: custom_rel
|
||||
) == expected
|
||||
end
|
||||
|
||||
test "link_map/2" do
|
||||
assert AutoLinker.link_map("google.com", []) ==
|
||||
{"<a href=\"http://google.com\" class=\"auto-linker\" target=\"_blank\" rel=\"noopener noreferrer\">google.com</a>",
|
||||
[]}
|
||||
end
|
||||
|
||||
describe "custom handlers" do
|
||||
|
@ -144,6 +162,22 @@ defmodule AutoLinkerTest do
|
|||
) == expected
|
||||
end
|
||||
|
||||
test "mentions inside html tags" do
|
||||
text =
|
||||
"<p><strong>hello world</strong></p>\n<p><`em>another @user__test and @user__test google.com paragraph</em></p>\n"
|
||||
|
||||
expected =
|
||||
"<p><strong>hello world</strong></p>\n<p><`em>another <a href=\"u/user__test\">@user__test</a> and <a href=\"u/user__test\">@user__test</a> <a href=\"http://google.com\">google.com</a> paragraph</em></p>\n"
|
||||
|
||||
assert AutoLinker.link(text,
|
||||
mention: true,
|
||||
mention_prefix: "u/",
|
||||
class: false,
|
||||
rel: false,
|
||||
new_window: false
|
||||
) == expected
|
||||
end
|
||||
|
||||
test "metion @user@example.com" do
|
||||
text = "hey @user@example.com"
|
||||
|
||||
|
|
|
@ -17,6 +17,16 @@ defmodule AutoLinker.BuilderTest do
|
|||
"<a href=\"http://text\" class=\"auto-linker\" target=\"_blank\" rel=\"me\">text</a>"
|
||||
|
||||
assert create_link("text", %{rel: "me"}) == expected
|
||||
|
||||
expected = "<a href=\"http://text\" class=\"auto-linker\" target=\"_blank\">t...</a>"
|
||||
|
||||
assert create_link("text", %{truncate: 3, rel: false}) == expected
|
||||
|
||||
expected = "<a href=\"http://text\" class=\"auto-linker\" target=\"_blank\">text</a>"
|
||||
assert create_link("text", %{truncate: 2, rel: false}) == expected
|
||||
|
||||
expected = "<a href=\"http://text\" class=\"auto-linker\" target=\"_blank\">http://text</a>"
|
||||
assert create_link("http://text", %{rel: false, strip_prefix: false}) == expected
|
||||
end
|
||||
|
||||
test "create_markdown_links/2" do
|
||||
|
@ -52,9 +62,9 @@ defmodule AutoLinker.BuilderTest do
|
|||
phrase = "my exten is x888. Call me."
|
||||
|
||||
expected =
|
||||
~s'my exten is <a href="#" class="phone-number" data-phone="888">x888</a>. Call me.'
|
||||
~s'my exten is <a href="#" class="phone-number" data-phone="888" test=\"test\">x888</a>. Call me.'
|
||||
|
||||
assert create_phone_link([["x888", ""]], phrase, []) == expected
|
||||
assert create_phone_link([["x888", ""]], phrase, attributes: [test: "test"]) == expected
|
||||
end
|
||||
|
||||
test "handles multiple links" do
|
||||
|
|
|
@ -69,7 +69,24 @@ defmodule AutoLinker.ParserTest do
|
|||
assert parse(text) == text
|
||||
end
|
||||
|
||||
test "does not link inside `<pre>` and `<code>`" do
|
||||
text = "<pre>google.com</pre>"
|
||||
assert parse(text) == text
|
||||
|
||||
text = "<code>google.com</code>"
|
||||
assert parse(text) == text
|
||||
|
||||
text = "<pre><code>google.com</code></pre>"
|
||||
assert parse(text) == text
|
||||
end
|
||||
|
||||
test "links url inside html" do
|
||||
text = "<div>google.com</div>"
|
||||
|
||||
expected = "<div><a href=\"http://google.com\">google.com</a></div>"
|
||||
|
||||
assert parse(text, class: false, rel: false, new_window: false, phone: false) == expected
|
||||
|
||||
text = "Check out <div class='section'>google.com</div>"
|
||||
|
||||
expected =
|
||||
|
@ -78,10 +95,21 @@ defmodule AutoLinker.ParserTest do
|
|||
assert parse(text, class: false, rel: false, new_window: false) == expected
|
||||
end
|
||||
|
||||
test "links url inside nested html" do
|
||||
text = "<p><strong>google.com</strong></p>"
|
||||
expected = "<p><strong><a href=\"http://google.com\">google.com</a></strong></p>"
|
||||
assert parse(text, class: false, rel: false, new_window: false) == expected
|
||||
end
|
||||
|
||||
test "excludes html with specified class" do
|
||||
text = "```Check out <div class='section'>google.com</div>```"
|
||||
assert parse(text, exclude_patterns: ["```"]) == text
|
||||
end
|
||||
|
||||
test "do not link urls" do
|
||||
text = "google.com"
|
||||
assert parse(text, url: false, phone: true) == text
|
||||
end
|
||||
end
|
||||
|
||||
def valid_number?([list], number) do
|
||||
|
|
Reference in a new issue