Merge branch 'dev-lanodan-url-regex' into 'develop'

lib/pleroma/formatter.ex: Fix URL regex

Closes #127

See merge request pleroma/pleroma!69
This commit is contained in:
kaniini 2018-08-16 15:41:16 +00:00
commit 183ccd1812
3 changed files with 42 additions and 6 deletions

View file

@ -16,6 +16,8 @@
config :pleroma, :emoji, shortcode_globs: ["/emoji/custom/**/*.png"] config :pleroma, :emoji, shortcode_globs: ["/emoji/custom/**/*.png"]
config :pleroma, :uri_schemes, additionnal_schemes: []
# Configures the endpoint # Configures the endpoint
config :pleroma, Pleroma.Web.Endpoint, config :pleroma, Pleroma.Web.Endpoint,
url: [host: "localhost"], url: [host: "localhost"],

View file

@ -165,8 +165,29 @@ def get_custom_emoji() do
@emoji @emoji
end end
@link_regex ~r/https?:\/\/[\w\.\/?=\-#\+%&@~'\(\):]+[\w\/]/u @link_regex ~r/[0-9a-z+\-\.]+:[0-9a-z$-_.+!*'(),]+/ui
# IANA got a list https://www.iana.org/assignments/uri-schemes/ but
# Stuff like ipfs isnt in it
# There is very niche stuff
@uri_schemes [
"https://",
"http://",
"dat://",
"dweb://",
"gopher://",
"ipfs://",
"ipns://",
"irc:",
"ircs:",
"magnet:",
"mailto:",
"mumble:",
"ssb://",
"xmpp:"
]
# TODO: make it use something other than @link_regex
def html_escape(text) do def html_escape(text) do
Regex.split(@link_regex, text, include_captures: true) Regex.split(@link_regex, text, include_captures: true)
|> Enum.map_every(2, fn chunk -> |> Enum.map_every(2, fn chunk ->
@ -176,11 +197,18 @@ def html_escape(text) do
|> Enum.join("") |> Enum.join("")
end end
@doc "changes http:... links to html links" @doc "changes scheme:... urls to html links"
def add_links({subs, text}) do def add_links({subs, text}) do
additionnal_schemes =
Application.get_env(:pleroma, :uri_schemes, [])
|> Keyword.get(:additionnal_schemes, [])
links = links =
Regex.scan(@link_regex, text) text
|> Enum.map(fn [url] -> {Ecto.UUID.generate(), url} end) |> String.split([" ", "\t", "<br>"])
|> Enum.filter(fn word -> String.starts_with?(word, @uri_schemes ++ additionnal_schemes) end)
|> Enum.filter(fn word -> Regex.match?(@link_regex, word) end)
|> Enum.map(fn url -> {Ecto.UUID.generate(), url} end)
|> Enum.sort_by(fn {_, url} -> -String.length(url) end) |> Enum.sort_by(fn {_, url} -> -String.length(url) end)
uuid_text = uuid_text =

View file

@ -20,10 +20,10 @@ test "turns hashtags into links" do
describe ".add_links" do describe ".add_links" do
test "turning urls into links" do test "turning urls into links" do
text = "Hey, check out https://www.youtube.com/watch?v=8Zg1-TufF%20zY?x=1&y=2#blabla." text = "Hey, check out https://www.youtube.com/watch?v=8Zg1-TufF%20zY?x=1&y=2#blabla ."
expected = expected =
"Hey, check out <a href=\"https://www.youtube.com/watch?v=8Zg1-TufF%20zY?x=1&amp;y=2#blabla\">https://www.youtube.com/watch?v=8Zg1-TufF%20zY?x=1&amp;y=2#blabla</a>." "Hey, check out <a href=\"https://www.youtube.com/watch?v=8Zg1-TufF%20zY?x=1&amp;y=2#blabla\">https://www.youtube.com/watch?v=8Zg1-TufF%20zY?x=1&amp;y=2#blabla</a> ."
assert Formatter.add_links({[], text}) |> Formatter.finalize() == expected assert Formatter.add_links({[], text}) |> Formatter.finalize() == expected
@ -85,6 +85,12 @@ test "turning urls into links" do
"<a href=\"https://pleroma.com\">https://pleroma.com</a> <a href=\"https://pleroma.com/sucks\">https://pleroma.com/sucks</a>" "<a href=\"https://pleroma.com\">https://pleroma.com</a> <a href=\"https://pleroma.com/sucks\">https://pleroma.com/sucks</a>"
assert Formatter.add_links({[], text}) |> Formatter.finalize() == expected assert Formatter.add_links({[], text}) |> Formatter.finalize() == expected
text = "xmpp:contact@hacktivis.me"
expected = "<a href=\"xmpp:contact@hacktivis.me\">xmpp:contact@hacktivis.me</a>"
assert Formatter.add_links({[], text}) |> Formatter.finalize() == expected
end end
end end