From 0276cf5a02f555938a7a3e71b6ab24228b1a5fda Mon Sep 17 00:00:00 2001 From: Maksim Pechnikov Date: Tue, 25 Jun 2019 15:52:53 +0300 Subject: [PATCH 1/8] fix validate_url for private ip --- lib/pleroma/web/rich_media/helpers.ex | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/lib/pleroma/web/rich_media/helpers.ex b/lib/pleroma/web/rich_media/helpers.ex index 94f56f70d..473ff800f 100644 --- a/lib/pleroma/web/rich_media/helpers.ex +++ b/lib/pleroma/web/rich_media/helpers.ex @@ -8,13 +8,21 @@ defmodule Pleroma.Web.RichMedia.Helpers do alias Pleroma.Object alias Pleroma.Web.RichMedia.Parser + @private_ip_regexp ~r/(127\.)|(10\.\d+\.\d+.\d+)|(192\.168\.) + |(^172\.1[6-9]\.)|(^172\.2[0-9]\.)|(^172\.3[0-1]\.)|(localhost)/ + defp validate_page_url(page_url) when is_binary(page_url) do validate_tld = Application.get_env(:auto_linker, :opts)[:validate_tld] - if AutoLinker.Parser.url?(page_url, scheme: true, validate_tld: validate_tld) do - URI.parse(page_url) |> validate_page_url - else - :error + cond do + Regex.match?(@private_ip_regexp, page_url) -> + :error + + AutoLinker.Parser.url?(page_url, scheme: true, validate_tld: validate_tld) -> + URI.parse(page_url) |> validate_page_url + + true -> + :error end end From 0cb8e710fbceb6cc40295f433747e3997d510832 Mon Sep 17 00:00:00 2001 From: Maksim Pechnikov Date: Tue, 25 Jun 2019 17:54:03 +0300 Subject: [PATCH 2/8] add test --- test/web/rich_media/helpers_test.exs | 41 ++++++++++++++++++---------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/test/web/rich_media/helpers_test.exs b/test/web/rich_media/helpers_test.exs index 53b0596f5..1823d9af5 100644 --- a/test/web/rich_media/helpers_test.exs +++ b/test/web/rich_media/helpers_test.exs @@ -1,14 +1,19 @@ defmodule Pleroma.Web.RichMedia.HelpersTest do use Pleroma.DataCase + alias Pleroma.Config alias Pleroma.Object alias Pleroma.Web.CommonAPI + alias Pleroma.Web.RichMedia.Helpers import Pleroma.Factory import Tesla.Mock setup do mock(fn env -> apply(HttpRequestMock, :request, [env]) end) + rich_media = Config.get([:rich_media, :enabled]) + on_exit(fn -> Config.put([:rich_media, :enabled], rich_media) end) + :ok end @@ -21,11 +26,9 @@ test "refuses to crawl incomplete URLs" do "content_type" => "text/markdown" }) - Pleroma.Config.put([:rich_media, :enabled], true) + Config.put([:rich_media, :enabled], true) assert %{} == Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) - - Pleroma.Config.put([:rich_media, :enabled], false) end test "refuses to crawl malformed URLs" do @@ -37,11 +40,9 @@ test "refuses to crawl malformed URLs" do "content_type" => "text/markdown" }) - Pleroma.Config.put([:rich_media, :enabled], true) + Config.put([:rich_media, :enabled], true) assert %{} == Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) - - Pleroma.Config.put([:rich_media, :enabled], false) end test "crawls valid, complete URLs" do @@ -53,12 +54,10 @@ test "crawls valid, complete URLs" do "content_type" => "text/markdown" }) - Pleroma.Config.put([:rich_media, :enabled], true) + Config.put([:rich_media, :enabled], true) assert %{page_url: "http://example.com/ogp", rich_media: _} = Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) - - Pleroma.Config.put([:rich_media, :enabled], false) end test "refuses to crawl URLs from posts marked sensitive" do @@ -74,11 +73,9 @@ test "refuses to crawl URLs from posts marked sensitive" do assert object.data["sensitive"] - Pleroma.Config.put([:rich_media, :enabled], true) + Config.put([:rich_media, :enabled], true) assert %{} = Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) - - Pleroma.Config.put([:rich_media, :enabled], false) end test "refuses to crawl URLs from posts tagged NSFW" do @@ -93,10 +90,26 @@ test "refuses to crawl URLs from posts tagged NSFW" do assert object.data["sensitive"] - Pleroma.Config.put([:rich_media, :enabled], true) + Config.put([:rich_media, :enabled], true) assert %{} = Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) + end - Pleroma.Config.put([:rich_media, :enabled], false) + test "refuses to crawl URLs of private network from posts" do + user = insert(:user) + Config.put([:rich_media, :enabled], true) + + {:ok, activity} = + CommonAPI.post(user, %{"status" => "http://127.0.0.1:4000/notice/9kCP7VNyPJXFOXDrgO"}) + + {:ok, activity2} = CommonAPI.post(user, %{"status" => "https://10.111.10.1/notice/9kCP7V"}) + + {:ok, activity3} = CommonAPI.post(user, %{"status" => "https://172.16.32.40/notice/9kCP7V"}) + {:ok, activity4} = CommonAPI.post(user, %{"status" => "https://192.168.10.40/notice/9kCP7V"}) + + assert %{} = Helpers.fetch_data_for_activity(activity) + assert %{} = Helpers.fetch_data_for_activity(activity2) + assert %{} = Helpers.fetch_data_for_activity(activity3) + assert %{} = Helpers.fetch_data_for_activity(activity4) end end From 4ad15ad2a90ca1ac370c8a79f796adc603a90479 Mon Sep 17 00:00:00 2001 From: Maksim Pechnikov Date: Tue, 25 Jun 2019 22:25:37 +0300 Subject: [PATCH 3/8] add ignore hosts and TLDs for rich_media --- config/config.exs | 5 ++- config/test.exs | 6 ++- docs/config.md | 2 + lib/pleroma/web/rich_media/helpers.ex | 40 ++++++++++++++----- test/support/http_request_mock.ex | 16 ++++++++ .../mastodon_api_controller_test.exs | 9 +++-- test/web/rich_media/helpers_test.exs | 10 +++-- 7 files changed, 67 insertions(+), 21 deletions(-) diff --git a/config/config.exs b/config/config.exs index 0d07fc692..5032b24e6 100644 --- a/config/config.exs +++ b/config/config.exs @@ -330,7 +330,10 @@ config :pleroma, :mrf_subchain, match_actor: %{} -config :pleroma, :rich_media, enabled: true +config :pleroma, :rich_media, + enabled: true, + ignore_hosts: [], + ignore_tld: ["local", "localdomain", "lan"] config :pleroma, :media_proxy, enabled: false, diff --git a/config/test.exs b/config/test.exs index 73a8b82a1..9d441a7f5 100644 --- a/config/test.exs +++ b/config/test.exs @@ -43,7 +43,11 @@ config :pbkdf2_elixir, rounds: 1 config :tesla, adapter: Tesla.Mock -config :pleroma, :rich_media, enabled: false + +config :pleroma, :rich_media, + enabled: false, + ignore_hosts: [], + ignore_tld: ["local", "localdomain", "lan"] config :web_push_encryption, :vapid_details, subject: "mailto:administrator@example.com", diff --git a/docs/config.md b/docs/config.md index b08c37e84..8c98f5c05 100644 --- a/docs/config.md +++ b/docs/config.md @@ -417,6 +417,8 @@ This config contains two queues: `federator_incoming` and `federator_outgoing`. ## :rich_media * `enabled`: if enabled the instance will parse metadata from attached links to generate link previews +* `ignore_hosts`: list host which will ignore for parse metadata. default is []. +* `ignore_tld`: list TLDs (top-level domains) which will ignore for parse metadata. default is ["local", "localdomain", "lan"] ## :fetch_initial_posts * `enabled`: if enabled, when a new user is federated with, fetch some of their latest posts diff --git a/lib/pleroma/web/rich_media/helpers.ex b/lib/pleroma/web/rich_media/helpers.ex index 473ff800f..4ece3e846 100644 --- a/lib/pleroma/web/rich_media/helpers.ex +++ b/lib/pleroma/web/rich_media/helpers.ex @@ -4,35 +4,53 @@ defmodule Pleroma.Web.RichMedia.Helpers do alias Pleroma.Activity + alias Pleroma.Config alias Pleroma.HTML alias Pleroma.Object alias Pleroma.Web.RichMedia.Parser - @private_ip_regexp ~r/(127\.)|(10\.\d+\.\d+.\d+)|(192\.168\.) - |(^172\.1[6-9]\.)|(^172\.2[0-9]\.)|(^172\.3[0-1]\.)|(localhost)/ + @validate_tld Application.get_env(:auto_linker, :opts)[:validate_tld] + @spec validate_page_url(any()) :: :ok | :error defp validate_page_url(page_url) when is_binary(page_url) do - validate_tld = Application.get_env(:auto_linker, :opts)[:validate_tld] + page_url + |> AutoLinker.Parser.url?(scheme: true, validate_tld: @validate_tld) + |> parse_uri(page_url) + end + defp validate_page_url(%URI{host: host, scheme: scheme, authority: authority}) + when scheme == "https" and not is_nil(authority) do cond do - Regex.match?(@private_ip_regexp, page_url) -> + host in Config.get([:rich_media, :ignore_hosts], []) -> :error - AutoLinker.Parser.url?(page_url, scheme: true, validate_tld: validate_tld) -> - URI.parse(page_url) |> validate_page_url + get_tld(host) in Config.get([:rich_media, :ignore_tld], []) -> + :error true -> - :error + :ok end end - defp validate_page_url(%URI{authority: nil}), do: :error - defp validate_page_url(%URI{scheme: nil}), do: :error - defp validate_page_url(%URI{}), do: :ok defp validate_page_url(_), do: :error + defp parse_uri(true, url) do + url + |> URI.parse() + |> validate_page_url + end + + defp parse_uri(_, _), do: :error + + defp get_tld(host) do + host + |> String.split(".") + |> Enum.reverse() + |> hd + end + def fetch_data_for_activity(%Activity{data: %{"type" => "Create"}} = activity) do - with true <- Pleroma.Config.get([:rich_media, :enabled]), + with true <- Config.get([:rich_media, :enabled]), %Object{} = object <- Object.normalize(activity), false <- object.data["sensitive"] || false, {:ok, page_url} <- HTML.extract_first_external_url(object, object.data["content"]), diff --git a/test/support/http_request_mock.ex b/test/support/http_request_mock.ex index f7f55a11a..30169edb0 100644 --- a/test/support/http_request_mock.ex +++ b/test/support/http_request_mock.ex @@ -757,6 +757,14 @@ def get("http://example.com/ogp", _, _, _) do {:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/ogp.html")}} end + def get("https://example.com/ogp", _, _, _) do + {:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/ogp.html")}} + end + + def get("https://pleroma.local/notice/9kCP7V", _, _, _) do + {:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/ogp.html")}} + end + def get("http://example.com/ogp-missing-data", _, _, _) do {:ok, %Tesla.Env{ @@ -765,6 +773,14 @@ def get("http://example.com/ogp-missing-data", _, _, _) do }} end + def get("https://example.com/ogp-missing-data", _, _, _) do + {:ok, + %Tesla.Env{ + status: 200, + body: File.read!("test/fixtures/rich_media/ogp-missing-data.html") + }} + end + def get("http://example.com/malformed", _, _, _) do {:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/malformed-data.html")}} diff --git a/test/web/mastodon_api/mastodon_api_controller_test.exs b/test/web/mastodon_api/mastodon_api_controller_test.exs index 707723421..17e723528 100644 --- a/test/web/mastodon_api/mastodon_api_controller_test.exs +++ b/test/web/mastodon_api/mastodon_api_controller_test.exs @@ -312,7 +312,7 @@ test "posting a status with OGP link preview", %{conn: conn} do conn |> assign(:user, user) |> post("/api/v1/statuses", %{ - "status" => "http://example.com/ogp" + "status" => "https://example.com/ogp" }) assert %{"id" => id, "card" => %{"title" => "The Rock"}} = json_response(conn, 200) @@ -2557,7 +2557,7 @@ test "max pinned statuses", %{conn: conn, user: user, activity: activity_one} do end test "returns rich-media card", %{conn: conn, user: user} do - {:ok, activity} = CommonAPI.post(user, %{"status" => "http://example.com/ogp"}) + {:ok, activity} = CommonAPI.post(user, %{"status" => "https://example.com/ogp"}) card_data = %{ "image" => "http://ia.media-imdb.com/images/rock.jpg", @@ -2589,7 +2589,7 @@ test "returns rich-media card", %{conn: conn, user: user} do # works with private posts {:ok, activity} = - CommonAPI.post(user, %{"status" => "http://example.com/ogp", "visibility" => "direct"}) + CommonAPI.post(user, %{"status" => "https://example.com/ogp", "visibility" => "direct"}) response_two = conn @@ -2601,7 +2601,8 @@ test "returns rich-media card", %{conn: conn, user: user} do end test "replaces missing description with an empty string", %{conn: conn, user: user} do - {:ok, activity} = CommonAPI.post(user, %{"status" => "http://example.com/ogp-missing-data"}) + {:ok, activity} = + CommonAPI.post(user, %{"status" => "https://example.com/ogp-missing-data"}) response = conn diff --git a/test/web/rich_media/helpers_test.exs b/test/web/rich_media/helpers_test.exs index 1823d9af5..c8f442b05 100644 --- a/test/web/rich_media/helpers_test.exs +++ b/test/web/rich_media/helpers_test.exs @@ -50,13 +50,13 @@ test "crawls valid, complete URLs" do {:ok, activity} = CommonAPI.post(user, %{ - "status" => "[test](http://example.com/ogp)", + "status" => "[test](https://example.com/ogp)", "content_type" => "text/markdown" }) Config.put([:rich_media, :enabled], true) - assert %{page_url: "http://example.com/ogp", rich_media: _} = + assert %{page_url: "https://example.com/ogp", rich_media: _} = Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) end @@ -97,19 +97,21 @@ test "refuses to crawl URLs from posts tagged NSFW" do test "refuses to crawl URLs of private network from posts" do user = insert(:user) - Config.put([:rich_media, :enabled], true) {:ok, activity} = CommonAPI.post(user, %{"status" => "http://127.0.0.1:4000/notice/9kCP7VNyPJXFOXDrgO"}) {:ok, activity2} = CommonAPI.post(user, %{"status" => "https://10.111.10.1/notice/9kCP7V"}) - {:ok, activity3} = CommonAPI.post(user, %{"status" => "https://172.16.32.40/notice/9kCP7V"}) {:ok, activity4} = CommonAPI.post(user, %{"status" => "https://192.168.10.40/notice/9kCP7V"}) + {:ok, activity5} = CommonAPI.post(user, %{"status" => "https://pleroma.local/notice/9kCP7V"}) + + Config.put([:rich_media, :enabled], true) assert %{} = Helpers.fetch_data_for_activity(activity) assert %{} = Helpers.fetch_data_for_activity(activity2) assert %{} = Helpers.fetch_data_for_activity(activity3) assert %{} = Helpers.fetch_data_for_activity(activity4) + assert %{} = Helpers.fetch_data_for_activity(activity5) end end From ec01b7c934560e5d60d29c8278e99b004b88ac61 Mon Sep 17 00:00:00 2001 From: Maksim Date: Wed, 26 Jun 2019 03:23:26 +0000 Subject: [PATCH 4/8] Apply suggestion to docs/config.md --- docs/config.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/config.md b/docs/config.md index 8c98f5c05..3490d943c 100644 --- a/docs/config.md +++ b/docs/config.md @@ -417,7 +417,7 @@ This config contains two queues: `federator_incoming` and `federator_outgoing`. ## :rich_media * `enabled`: if enabled the instance will parse metadata from attached links to generate link previews -* `ignore_hosts`: list host which will ignore for parse metadata. default is []. +* `ignore_hosts`: list of hosts which will be ignored by the metadata parser. For example `["accounts.google.com", "xss.website"]`, defaults to `[]`. * `ignore_tld`: list TLDs (top-level domains) which will ignore for parse metadata. default is ["local", "localdomain", "lan"] ## :fetch_initial_posts From 5c0f646cef37e1abc02f5c8a64205d81b2d4d4c4 Mon Sep 17 00:00:00 2001 From: Maksim Pechnikov Date: Wed, 26 Jun 2019 06:24:12 +0300 Subject: [PATCH 5/8] fix validate_page_url --- lib/pleroma/web/rich_media/helpers.ex | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/pleroma/web/rich_media/helpers.ex b/lib/pleroma/web/rich_media/helpers.ex index 4ece3e846..6506de46c 100644 --- a/lib/pleroma/web/rich_media/helpers.ex +++ b/lib/pleroma/web/rich_media/helpers.ex @@ -9,12 +9,12 @@ defmodule Pleroma.Web.RichMedia.Helpers do alias Pleroma.Object alias Pleroma.Web.RichMedia.Parser - @validate_tld Application.get_env(:auto_linker, :opts)[:validate_tld] - @spec validate_page_url(any()) :: :ok | :error defp validate_page_url(page_url) when is_binary(page_url) do + validate_tld = Application.get_env(:auto_linker, :opts)[:validate_tld] + page_url - |> AutoLinker.Parser.url?(scheme: true, validate_tld: @validate_tld) + |> AutoLinker.Parser.url?(scheme: true, validate_tld: validate_tld) |> parse_uri(page_url) end From 7d6a543eca8cbb295a40da6c70259a442d8d1ad6 Mon Sep 17 00:00:00 2001 From: Maksim Pechnikov Date: Wed, 26 Jun 2019 06:39:50 +0300 Subject: [PATCH 6/8] update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d3c554245..1708a2700 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -102,6 +102,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - Posts which are marked sensitive or tagged nsfw no longer have link previews. - HTTP connection timeout is now set to 10 seconds. - Respond with a 404 Not implemented JSON error message when requested API is not implemented +- Rich Media: Added `ignore_hosts` and `ignore_tld` config params, that allow to set host and top level domain to ignore for crawl URLs from posts. +- Rich Media: crawls only https URLs. ### Fixed - Follow requests don't get 'stuck' anymore. From f1817fe94bd0de3e6e6b08fc1a12113c45020d28 Mon Sep 17 00:00:00 2001 From: Maksim Date: Wed, 26 Jun 2019 03:43:53 +0000 Subject: [PATCH 7/8] Apply suggestion to CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1708a2700..e5857caa7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -103,7 +103,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - HTTP connection timeout is now set to 10 seconds. - Respond with a 404 Not implemented JSON error message when requested API is not implemented - Rich Media: Added `ignore_hosts` and `ignore_tld` config params, that allow to set host and top level domain to ignore for crawl URLs from posts. -- Rich Media: crawls only https URLs. +- Rich Media: crawl only https URLs. ### Fixed - Follow requests don't get 'stuck' anymore. From 437fd6046d6e6f19a5b9cd2e8178ec872a94af5d Mon Sep 17 00:00:00 2001 From: Maksim Pechnikov Date: Wed, 26 Jun 2019 06:46:06 +0300 Subject: [PATCH 8/8] update changelog --- CHANGELOG.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e5857caa7..2884825e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ## [unreleased] ### Security - Mastodon API: Fix display names not being sanitized +- Rich media: Do not crawl private IP ranges + ### Added - Add a generic settings store for frontends / clients to use. - Explicit addressing option for posting. @@ -63,6 +65,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - Configuration: `skip_thread_containment` option - Configuration: `rate_limit` option. See `Pleroma.Plugs.RateLimiter` documentation for details. - MRF: Support for filtering out likely spam messages by rejecting posts from new users that contain links. +- Configuration: `ignore_hosts` option +- Configuration: `ignore_tld` option ### Changed - **Breaking:** bind to 127.0.0.1 instead of 0.0.0.0 by default @@ -102,7 +106,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - Posts which are marked sensitive or tagged nsfw no longer have link previews. - HTTP connection timeout is now set to 10 seconds. - Respond with a 404 Not implemented JSON error message when requested API is not implemented -- Rich Media: Added `ignore_hosts` and `ignore_tld` config params, that allow to set host and top level domain to ignore for crawl URLs from posts. - Rich Media: crawl only https URLs. ### Fixed