From e198ba492e5cb1b6ff81775db08298bfcdf1454a Mon Sep 17 00:00:00 2001 From: rinpatch Date: Sat, 5 Sep 2020 12:37:27 +0300 Subject: [PATCH] Rich Media: Do not cache URLs for preview statuses Closes #1987 --- CHANGELOG.md | 1 + lib/pleroma/html.ex | 32 +++++++++------- lib/pleroma/web/rich_media/helpers.ex | 2 +- test/html_test.exs | 14 +++---- .../controllers/status_controller_test.exs | 38 ++++++++++++++++++- 5 files changed, 65 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cdd5b94a8..4662045ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - Mastodon API: Search parameter `following` now correctly returns the followings rather than the followers - Mastodon API: Timelines hanging for (`number of posts with links * rich media timeout`) in the worst case. Reduced to just rich media timeout. +- Mastodon API: Cards being wrong for preview statuses due to cache key collision - Password resets no longer processed for deactivated accounts ## [2.1.0] - 2020-08-28 diff --git a/lib/pleroma/html.ex b/lib/pleroma/html.ex index 20b02f091..43e9145be 100644 --- a/lib/pleroma/html.ex +++ b/lib/pleroma/html.ex @@ -100,21 +100,27 @@ defp generate_scrubber_signature(scrubbers) do end) end - def extract_first_external_url(_, nil), do: {:error, "No content"} + def extract_first_external_url_from_object(%{data: %{"content" => content}} = object) + when is_binary(content) do + unless object.data["fake"] do + key = "URL|#{object.id}" - def extract_first_external_url(object, content) do - key = "URL|#{object.id}" + Cachex.fetch!(:scrubber_cache, key, fn _key -> + {:commit, {:ok, extract_first_external_url(content)}} + end) + else + {:ok, extract_first_external_url(content)} + end + end - Cachex.fetch!(:scrubber_cache, key, fn _key -> - result = - content - |> Floki.parse_fragment!() - |> Floki.find("a:not(.mention,.hashtag,.attachment,[rel~=\"tag\"])") - |> Enum.take(1) - |> Floki.attribute("href") - |> Enum.at(0) + def extract_first_external_url_from_object(_), do: {:error, :no_content} - {:commit, {:ok, result}} - end) + def extract_first_external_url(content) do + content + |> Floki.parse_fragment!() + |> Floki.find("a:not(.mention,.hashtag,.attachment,[rel~=\"tag\"])") + |> Enum.take(1) + |> Floki.attribute("href") + |> Enum.at(0) end end diff --git a/lib/pleroma/web/rich_media/helpers.ex b/lib/pleroma/web/rich_media/helpers.ex index 2fb482b51..752ca9f81 100644 --- a/lib/pleroma/web/rich_media/helpers.ex +++ b/lib/pleroma/web/rich_media/helpers.ex @@ -58,7 +58,7 @@ def fetch_data_for_object(object) do with true <- Config.get([:rich_media, :enabled]), false <- object.data["sensitive"] || false, {:ok, page_url} <- - HTML.extract_first_external_url(object, object.data["content"]), + HTML.extract_first_external_url_from_object(object), :ok <- validate_page_url(page_url), {:ok, rich_media} <- Parser.parse(page_url) do %{page_url: page_url, rich_media: rich_media} diff --git a/test/html_test.exs b/test/html_test.exs index f8907c8b4..7d3756884 100644 --- a/test/html_test.exs +++ b/test/html_test.exs @@ -165,7 +165,7 @@ test "filters invalid microformats markup" do end end - describe "extract_first_external_url" do + describe "extract_first_external_url_from_object" do test "extracts the url" do user = insert(:user) @@ -176,7 +176,7 @@ test "extracts the url" do }) object = Object.normalize(activity) - {:ok, url} = HTML.extract_first_external_url(object, object.data["content"]) + {:ok, url} = HTML.extract_first_external_url_from_object(object) assert url == "https://github.com/komeiji-satori/Dress" end @@ -191,7 +191,7 @@ test "skips mentions" do }) object = Object.normalize(activity) - {:ok, url} = HTML.extract_first_external_url(object, object.data["content"]) + {:ok, url} = HTML.extract_first_external_url_from_object(object) assert url == "https://github.com/syuilo/misskey/blob/develop/docs/setup.en.md" @@ -207,7 +207,7 @@ test "skips hashtags" do }) object = Object.normalize(activity) - {:ok, url} = HTML.extract_first_external_url(object, object.data["content"]) + {:ok, url} = HTML.extract_first_external_url_from_object(object) assert url == "https://www.pixiv.net/member_illust.php?mode=medium&illust_id=72255140" end @@ -223,7 +223,7 @@ test "skips microformats hashtags" do }) object = Object.normalize(activity) - {:ok, url} = HTML.extract_first_external_url(object, object.data["content"]) + {:ok, url} = HTML.extract_first_external_url_from_object(object) assert url == "https://www.pixiv.net/member_illust.php?mode=medium&illust_id=72255140" end @@ -235,7 +235,7 @@ test "does not crash when there is an HTML entity in a link" do object = Object.normalize(activity) - assert {:ok, nil} = HTML.extract_first_external_url(object, object.data["content"]) + assert {:ok, nil} = HTML.extract_first_external_url_from_object(object) end test "skips attachment links" do @@ -249,7 +249,7 @@ test "skips attachment links" do object = Object.normalize(activity) - assert {:ok, nil} = HTML.extract_first_external_url(object, object.data["content"]) + assert {:ok, nil} = HTML.extract_first_external_url_from_object(object) end end end diff --git a/test/web/mastodon_api/controllers/status_controller_test.exs b/test/web/mastodon_api/controllers/status_controller_test.exs index 5955d8334..f221884e7 100644 --- a/test/web/mastodon_api/controllers/status_controller_test.exs +++ b/test/web/mastodon_api/controllers/status_controller_test.exs @@ -296,9 +296,45 @@ test "posting a fake status", %{conn: conn} do assert real_status == fake_status end + test "fake statuses' preview card is not cached", %{conn: conn} do + clear_config([:rich_media, :enabled], true) + + Tesla.Mock.mock(fn + %{ + method: :get, + url: "https://example.com/twitter-card" + } -> + %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/twitter_card.html")} + + env -> + apply(HttpRequestMock, :request, [env]) + end) + + conn1 = + conn + |> put_req_header("content-type", "application/json") + |> post("/api/v1/statuses", %{ + "status" => "https://example.com/ogp", + "preview" => true + }) + + conn2 = + conn + |> put_req_header("content-type", "application/json") + |> post("/api/v1/statuses", %{ + "status" => "https://example.com/twitter-card", + "preview" => true + }) + + assert %{"card" => %{"title" => "The Rock"}} = json_response_and_validate_schema(conn1, 200) + + assert %{"card" => %{"title" => "Small Island Developing States Photo Submission"}} = + json_response_and_validate_schema(conn2, 200) + end + test "posting a status with OGP link preview", %{conn: conn} do Tesla.Mock.mock(fn env -> apply(HttpRequestMock, :request, [env]) end) - Config.put([:rich_media, :enabled], true) + clear_config([:rich_media, :enabled], true) conn = conn