add ignore hosts and TLDs for rich_media

This commit is contained in:
Maksim Pechnikov 2019-06-25 22:25:37 +03:00
parent 0cb8e710fb
commit 4ad15ad2a9
7 changed files with 67 additions and 21 deletions
config
docs
lib/pleroma/web/rich_media
test

View file

@ -330,7 +330,10 @@
config :pleroma, :mrf_subchain, match_actor: %{}
config :pleroma, :rich_media, enabled: true
config :pleroma, :rich_media,
enabled: true,
ignore_hosts: [],
ignore_tld: ["local", "localdomain", "lan"]
config :pleroma, :media_proxy,
enabled: false,

View file

@ -43,7 +43,11 @@
config :pbkdf2_elixir, rounds: 1
config :tesla, adapter: Tesla.Mock
config :pleroma, :rich_media, enabled: false
config :pleroma, :rich_media,
enabled: false,
ignore_hosts: [],
ignore_tld: ["local", "localdomain", "lan"]
config :web_push_encryption, :vapid_details,
subject: "mailto:administrator@example.com",

View file

@ -417,6 +417,8 @@ This config contains two queues: `federator_incoming` and `federator_outgoing`.
## :rich_media
* `enabled`: if enabled the instance will parse metadata from attached links to generate link previews
* `ignore_hosts`: list host which will ignore for parse metadata. default is [].
* `ignore_tld`: list TLDs (top-level domains) which will ignore for parse metadata. default is ["local", "localdomain", "lan"]
## :fetch_initial_posts
* `enabled`: if enabled, when a new user is federated with, fetch some of their latest posts

View file

@ -4,35 +4,53 @@
defmodule Pleroma.Web.RichMedia.Helpers do
alias Pleroma.Activity
alias Pleroma.Config
alias Pleroma.HTML
alias Pleroma.Object
alias Pleroma.Web.RichMedia.Parser
@private_ip_regexp ~r/(127\.)|(10\.\d+\.\d+.\d+)|(192\.168\.)
|(^172\.1[6-9]\.)|(^172\.2[0-9]\.)|(^172\.3[0-1]\.)|(localhost)/
@validate_tld Application.get_env(:auto_linker, :opts)[:validate_tld]
@spec validate_page_url(any()) :: :ok | :error
defp validate_page_url(page_url) when is_binary(page_url) do
validate_tld = Application.get_env(:auto_linker, :opts)[:validate_tld]
page_url
|> AutoLinker.Parser.url?(scheme: true, validate_tld: @validate_tld)
|> parse_uri(page_url)
end
defp validate_page_url(%URI{host: host, scheme: scheme, authority: authority})
when scheme == "https" and not is_nil(authority) do
cond do
Regex.match?(@private_ip_regexp, page_url) ->
host in Config.get([:rich_media, :ignore_hosts], []) ->
:error
AutoLinker.Parser.url?(page_url, scheme: true, validate_tld: validate_tld) ->
URI.parse(page_url) |> validate_page_url
get_tld(host) in Config.get([:rich_media, :ignore_tld], []) ->
:error
true ->
:error
:ok
end
end
defp validate_page_url(%URI{authority: nil}), do: :error
defp validate_page_url(%URI{scheme: nil}), do: :error
defp validate_page_url(%URI{}), do: :ok
defp validate_page_url(_), do: :error
defp parse_uri(true, url) do
url
|> URI.parse()
|> validate_page_url
end
defp parse_uri(_, _), do: :error
defp get_tld(host) do
host
|> String.split(".")
|> Enum.reverse()
|> hd
end
def fetch_data_for_activity(%Activity{data: %{"type" => "Create"}} = activity) do
with true <- Pleroma.Config.get([:rich_media, :enabled]),
with true <- Config.get([:rich_media, :enabled]),
%Object{} = object <- Object.normalize(activity),
false <- object.data["sensitive"] || false,
{:ok, page_url} <- HTML.extract_first_external_url(object, object.data["content"]),

View file

@ -757,6 +757,14 @@ def get("http://example.com/ogp", _, _, _) do
{:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/ogp.html")}}
end
def get("https://example.com/ogp", _, _, _) do
{:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/ogp.html")}}
end
def get("https://pleroma.local/notice/9kCP7V", _, _, _) do
{:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/ogp.html")}}
end
def get("http://example.com/ogp-missing-data", _, _, _) do
{:ok,
%Tesla.Env{
@ -765,6 +773,14 @@ def get("http://example.com/ogp-missing-data", _, _, _) do
}}
end
def get("https://example.com/ogp-missing-data", _, _, _) do
{:ok,
%Tesla.Env{
status: 200,
body: File.read!("test/fixtures/rich_media/ogp-missing-data.html")
}}
end
def get("http://example.com/malformed", _, _, _) do
{:ok,
%Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/malformed-data.html")}}

View file

@ -312,7 +312,7 @@ test "posting a status with OGP link preview", %{conn: conn} do
conn
|> assign(:user, user)
|> post("/api/v1/statuses", %{
"status" => "http://example.com/ogp"
"status" => "https://example.com/ogp"
})
assert %{"id" => id, "card" => %{"title" => "The Rock"}} = json_response(conn, 200)
@ -2557,7 +2557,7 @@ test "max pinned statuses", %{conn: conn, user: user, activity: activity_one} do
end
test "returns rich-media card", %{conn: conn, user: user} do
{:ok, activity} = CommonAPI.post(user, %{"status" => "http://example.com/ogp"})
{:ok, activity} = CommonAPI.post(user, %{"status" => "https://example.com/ogp"})
card_data = %{
"image" => "http://ia.media-imdb.com/images/rock.jpg",
@ -2589,7 +2589,7 @@ test "returns rich-media card", %{conn: conn, user: user} do
# works with private posts
{:ok, activity} =
CommonAPI.post(user, %{"status" => "http://example.com/ogp", "visibility" => "direct"})
CommonAPI.post(user, %{"status" => "https://example.com/ogp", "visibility" => "direct"})
response_two =
conn
@ -2601,7 +2601,8 @@ test "returns rich-media card", %{conn: conn, user: user} do
end
test "replaces missing description with an empty string", %{conn: conn, user: user} do
{:ok, activity} = CommonAPI.post(user, %{"status" => "http://example.com/ogp-missing-data"})
{:ok, activity} =
CommonAPI.post(user, %{"status" => "https://example.com/ogp-missing-data"})
response =
conn

View file

@ -50,13 +50,13 @@ test "crawls valid, complete URLs" do
{:ok, activity} =
CommonAPI.post(user, %{
"status" => "[test](http://example.com/ogp)",
"status" => "[test](https://example.com/ogp)",
"content_type" => "text/markdown"
})
Config.put([:rich_media, :enabled], true)
assert %{page_url: "http://example.com/ogp", rich_media: _} =
assert %{page_url: "https://example.com/ogp", rich_media: _} =
Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity)
end
@ -97,19 +97,21 @@ test "refuses to crawl URLs from posts tagged NSFW" do
test "refuses to crawl URLs of private network from posts" do
user = insert(:user)
Config.put([:rich_media, :enabled], true)
{:ok, activity} =
CommonAPI.post(user, %{"status" => "http://127.0.0.1:4000/notice/9kCP7VNyPJXFOXDrgO"})
{:ok, activity2} = CommonAPI.post(user, %{"status" => "https://10.111.10.1/notice/9kCP7V"})
{:ok, activity3} = CommonAPI.post(user, %{"status" => "https://172.16.32.40/notice/9kCP7V"})
{:ok, activity4} = CommonAPI.post(user, %{"status" => "https://192.168.10.40/notice/9kCP7V"})
{:ok, activity5} = CommonAPI.post(user, %{"status" => "https://pleroma.local/notice/9kCP7V"})
Config.put([:rich_media, :enabled], true)
assert %{} = Helpers.fetch_data_for_activity(activity)
assert %{} = Helpers.fetch_data_for_activity(activity2)
assert %{} = Helpers.fetch_data_for_activity(activity3)
assert %{} = Helpers.fetch_data_for_activity(activity4)
assert %{} = Helpers.fetch_data_for_activity(activity5)
end
end