From 4ad15ad2a90ca1ac370c8a79f796adc603a90479 Mon Sep 17 00:00:00 2001 From: Maksim Pechnikov Date: Tue, 25 Jun 2019 22:25:37 +0300 Subject: [PATCH] add ignore hosts and TLDs for rich_media --- config/config.exs | 5 ++- config/test.exs | 6 ++- docs/config.md | 2 + lib/pleroma/web/rich_media/helpers.ex | 40 ++++++++++++++----- test/support/http_request_mock.ex | 16 ++++++++ .../mastodon_api_controller_test.exs | 9 +++-- test/web/rich_media/helpers_test.exs | 10 +++-- 7 files changed, 67 insertions(+), 21 deletions(-) diff --git a/config/config.exs b/config/config.exs index 0d07fc692..5032b24e6 100644 --- a/config/config.exs +++ b/config/config.exs @@ -330,7 +330,10 @@ config :pleroma, :mrf_subchain, match_actor: %{} -config :pleroma, :rich_media, enabled: true +config :pleroma, :rich_media, + enabled: true, + ignore_hosts: [], + ignore_tld: ["local", "localdomain", "lan"] config :pleroma, :media_proxy, enabled: false, diff --git a/config/test.exs b/config/test.exs index 73a8b82a1..9d441a7f5 100644 --- a/config/test.exs +++ b/config/test.exs @@ -43,7 +43,11 @@ config :pbkdf2_elixir, rounds: 1 config :tesla, adapter: Tesla.Mock -config :pleroma, :rich_media, enabled: false + +config :pleroma, :rich_media, + enabled: false, + ignore_hosts: [], + ignore_tld: ["local", "localdomain", "lan"] config :web_push_encryption, :vapid_details, subject: "mailto:administrator@example.com", diff --git a/docs/config.md b/docs/config.md index b08c37e84..8c98f5c05 100644 --- a/docs/config.md +++ b/docs/config.md @@ -417,6 +417,8 @@ This config contains two queues: `federator_incoming` and `federator_outgoing`. ## :rich_media * `enabled`: if enabled the instance will parse metadata from attached links to generate link previews +* `ignore_hosts`: list host which will ignore for parse metadata. default is []. +* `ignore_tld`: list TLDs (top-level domains) which will ignore for parse metadata. default is ["local", "localdomain", "lan"] ## :fetch_initial_posts * `enabled`: if enabled, when a new user is federated with, fetch some of their latest posts diff --git a/lib/pleroma/web/rich_media/helpers.ex b/lib/pleroma/web/rich_media/helpers.ex index 473ff800f..4ece3e846 100644 --- a/lib/pleroma/web/rich_media/helpers.ex +++ b/lib/pleroma/web/rich_media/helpers.ex @@ -4,35 +4,53 @@ defmodule Pleroma.Web.RichMedia.Helpers do alias Pleroma.Activity + alias Pleroma.Config alias Pleroma.HTML alias Pleroma.Object alias Pleroma.Web.RichMedia.Parser - @private_ip_regexp ~r/(127\.)|(10\.\d+\.\d+.\d+)|(192\.168\.) - |(^172\.1[6-9]\.)|(^172\.2[0-9]\.)|(^172\.3[0-1]\.)|(localhost)/ + @validate_tld Application.get_env(:auto_linker, :opts)[:validate_tld] + @spec validate_page_url(any()) :: :ok | :error defp validate_page_url(page_url) when is_binary(page_url) do - validate_tld = Application.get_env(:auto_linker, :opts)[:validate_tld] + page_url + |> AutoLinker.Parser.url?(scheme: true, validate_tld: @validate_tld) + |> parse_uri(page_url) + end + defp validate_page_url(%URI{host: host, scheme: scheme, authority: authority}) + when scheme == "https" and not is_nil(authority) do cond do - Regex.match?(@private_ip_regexp, page_url) -> + host in Config.get([:rich_media, :ignore_hosts], []) -> :error - AutoLinker.Parser.url?(page_url, scheme: true, validate_tld: validate_tld) -> - URI.parse(page_url) |> validate_page_url + get_tld(host) in Config.get([:rich_media, :ignore_tld], []) -> + :error true -> - :error + :ok end end - defp validate_page_url(%URI{authority: nil}), do: :error - defp validate_page_url(%URI{scheme: nil}), do: :error - defp validate_page_url(%URI{}), do: :ok defp validate_page_url(_), do: :error + defp parse_uri(true, url) do + url + |> URI.parse() + |> validate_page_url + end + + defp parse_uri(_, _), do: :error + + defp get_tld(host) do + host + |> String.split(".") + |> Enum.reverse() + |> hd + end + def fetch_data_for_activity(%Activity{data: %{"type" => "Create"}} = activity) do - with true <- Pleroma.Config.get([:rich_media, :enabled]), + with true <- Config.get([:rich_media, :enabled]), %Object{} = object <- Object.normalize(activity), false <- object.data["sensitive"] || false, {:ok, page_url} <- HTML.extract_first_external_url(object, object.data["content"]), diff --git a/test/support/http_request_mock.ex b/test/support/http_request_mock.ex index f7f55a11a..30169edb0 100644 --- a/test/support/http_request_mock.ex +++ b/test/support/http_request_mock.ex @@ -757,6 +757,14 @@ def get("http://example.com/ogp", _, _, _) do {:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/ogp.html")}} end + def get("https://example.com/ogp", _, _, _) do + {:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/ogp.html")}} + end + + def get("https://pleroma.local/notice/9kCP7V", _, _, _) do + {:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/ogp.html")}} + end + def get("http://example.com/ogp-missing-data", _, _, _) do {:ok, %Tesla.Env{ @@ -765,6 +773,14 @@ def get("http://example.com/ogp-missing-data", _, _, _) do }} end + def get("https://example.com/ogp-missing-data", _, _, _) do + {:ok, + %Tesla.Env{ + status: 200, + body: File.read!("test/fixtures/rich_media/ogp-missing-data.html") + }} + end + def get("http://example.com/malformed", _, _, _) do {:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/malformed-data.html")}} diff --git a/test/web/mastodon_api/mastodon_api_controller_test.exs b/test/web/mastodon_api/mastodon_api_controller_test.exs index 707723421..17e723528 100644 --- a/test/web/mastodon_api/mastodon_api_controller_test.exs +++ b/test/web/mastodon_api/mastodon_api_controller_test.exs @@ -312,7 +312,7 @@ test "posting a status with OGP link preview", %{conn: conn} do conn |> assign(:user, user) |> post("/api/v1/statuses", %{ - "status" => "http://example.com/ogp" + "status" => "https://example.com/ogp" }) assert %{"id" => id, "card" => %{"title" => "The Rock"}} = json_response(conn, 200) @@ -2557,7 +2557,7 @@ test "max pinned statuses", %{conn: conn, user: user, activity: activity_one} do end test "returns rich-media card", %{conn: conn, user: user} do - {:ok, activity} = CommonAPI.post(user, %{"status" => "http://example.com/ogp"}) + {:ok, activity} = CommonAPI.post(user, %{"status" => "https://example.com/ogp"}) card_data = %{ "image" => "http://ia.media-imdb.com/images/rock.jpg", @@ -2589,7 +2589,7 @@ test "returns rich-media card", %{conn: conn, user: user} do # works with private posts {:ok, activity} = - CommonAPI.post(user, %{"status" => "http://example.com/ogp", "visibility" => "direct"}) + CommonAPI.post(user, %{"status" => "https://example.com/ogp", "visibility" => "direct"}) response_two = conn @@ -2601,7 +2601,8 @@ test "returns rich-media card", %{conn: conn, user: user} do end test "replaces missing description with an empty string", %{conn: conn, user: user} do - {:ok, activity} = CommonAPI.post(user, %{"status" => "http://example.com/ogp-missing-data"}) + {:ok, activity} = + CommonAPI.post(user, %{"status" => "https://example.com/ogp-missing-data"}) response = conn diff --git a/test/web/rich_media/helpers_test.exs b/test/web/rich_media/helpers_test.exs index 1823d9af5..c8f442b05 100644 --- a/test/web/rich_media/helpers_test.exs +++ b/test/web/rich_media/helpers_test.exs @@ -50,13 +50,13 @@ test "crawls valid, complete URLs" do {:ok, activity} = CommonAPI.post(user, %{ - "status" => "[test](http://example.com/ogp)", + "status" => "[test](https://example.com/ogp)", "content_type" => "text/markdown" }) Config.put([:rich_media, :enabled], true) - assert %{page_url: "http://example.com/ogp", rich_media: _} = + assert %{page_url: "https://example.com/ogp", rich_media: _} = Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) end @@ -97,19 +97,21 @@ test "refuses to crawl URLs from posts tagged NSFW" do test "refuses to crawl URLs of private network from posts" do user = insert(:user) - Config.put([:rich_media, :enabled], true) {:ok, activity} = CommonAPI.post(user, %{"status" => "http://127.0.0.1:4000/notice/9kCP7VNyPJXFOXDrgO"}) {:ok, activity2} = CommonAPI.post(user, %{"status" => "https://10.111.10.1/notice/9kCP7V"}) - {:ok, activity3} = CommonAPI.post(user, %{"status" => "https://172.16.32.40/notice/9kCP7V"}) {:ok, activity4} = CommonAPI.post(user, %{"status" => "https://192.168.10.40/notice/9kCP7V"}) + {:ok, activity5} = CommonAPI.post(user, %{"status" => "https://pleroma.local/notice/9kCP7V"}) + + Config.put([:rich_media, :enabled], true) assert %{} = Helpers.fetch_data_for_activity(activity) assert %{} = Helpers.fetch_data_for_activity(activity2) assert %{} = Helpers.fetch_data_for_activity(activity3) assert %{} = Helpers.fetch_data_for_activity(activity4) + assert %{} = Helpers.fetch_data_for_activity(activity5) end end