Rich Media: Do not cache URLs for preview statuses

Closes #1987
This commit is contained in:
rinpatch 2020-09-05 12:37:27 +03:00
parent 2cd2a8fda3
commit e198ba492e
5 changed files with 65 additions and 22 deletions

View file

@ -12,6 +12,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
- Mastodon API: Search parameter `following` now correctly returns the followings rather than the followers
- Mastodon API: Timelines hanging for (`number of posts with links * rich media timeout`) in the worst case.
Reduced to just rich media timeout.
- Mastodon API: Cards being wrong for preview statuses due to cache key collision
- Password resets no longer processed for deactivated accounts
## [2.1.0] - 2020-08-28

View file

@ -100,21 +100,27 @@ defp generate_scrubber_signature(scrubbers) do
end)
end
def extract_first_external_url(_, nil), do: {:error, "No content"}
def extract_first_external_url_from_object(%{data: %{"content" => content}} = object)
when is_binary(content) do
unless object.data["fake"] do
key = "URL|#{object.id}"
def extract_first_external_url(object, content) do
key = "URL|#{object.id}"
Cachex.fetch!(:scrubber_cache, key, fn _key ->
{:commit, {:ok, extract_first_external_url(content)}}
end)
else
{:ok, extract_first_external_url(content)}
end
end
Cachex.fetch!(:scrubber_cache, key, fn _key ->
result =
content
|> Floki.parse_fragment!()
|> Floki.find("a:not(.mention,.hashtag,.attachment,[rel~=\"tag\"])")
|> Enum.take(1)
|> Floki.attribute("href")
|> Enum.at(0)
def extract_first_external_url_from_object(_), do: {:error, :no_content}
{:commit, {:ok, result}}
end)
def extract_first_external_url(content) do
content
|> Floki.parse_fragment!()
|> Floki.find("a:not(.mention,.hashtag,.attachment,[rel~=\"tag\"])")
|> Enum.take(1)
|> Floki.attribute("href")
|> Enum.at(0)
end
end

View file

@ -58,7 +58,7 @@ def fetch_data_for_object(object) do
with true <- Config.get([:rich_media, :enabled]),
false <- object.data["sensitive"] || false,
{:ok, page_url} <-
HTML.extract_first_external_url(object, object.data["content"]),
HTML.extract_first_external_url_from_object(object),
:ok <- validate_page_url(page_url),
{:ok, rich_media} <- Parser.parse(page_url) do
%{page_url: page_url, rich_media: rich_media}

View file

@ -165,7 +165,7 @@ test "filters invalid microformats markup" do
end
end
describe "extract_first_external_url" do
describe "extract_first_external_url_from_object" do
test "extracts the url" do
user = insert(:user)
@ -176,7 +176,7 @@ test "extracts the url" do
})
object = Object.normalize(activity)
{:ok, url} = HTML.extract_first_external_url(object, object.data["content"])
{:ok, url} = HTML.extract_first_external_url_from_object(object)
assert url == "https://github.com/komeiji-satori/Dress"
end
@ -191,7 +191,7 @@ test "skips mentions" do
})
object = Object.normalize(activity)
{:ok, url} = HTML.extract_first_external_url(object, object.data["content"])
{:ok, url} = HTML.extract_first_external_url_from_object(object)
assert url == "https://github.com/syuilo/misskey/blob/develop/docs/setup.en.md"
@ -207,7 +207,7 @@ test "skips hashtags" do
})
object = Object.normalize(activity)
{:ok, url} = HTML.extract_first_external_url(object, object.data["content"])
{:ok, url} = HTML.extract_first_external_url_from_object(object)
assert url == "https://www.pixiv.net/member_illust.php?mode=medium&illust_id=72255140"
end
@ -223,7 +223,7 @@ test "skips microformats hashtags" do
})
object = Object.normalize(activity)
{:ok, url} = HTML.extract_first_external_url(object, object.data["content"])
{:ok, url} = HTML.extract_first_external_url_from_object(object)
assert url == "https://www.pixiv.net/member_illust.php?mode=medium&illust_id=72255140"
end
@ -235,7 +235,7 @@ test "does not crash when there is an HTML entity in a link" do
object = Object.normalize(activity)
assert {:ok, nil} = HTML.extract_first_external_url(object, object.data["content"])
assert {:ok, nil} = HTML.extract_first_external_url_from_object(object)
end
test "skips attachment links" do
@ -249,7 +249,7 @@ test "skips attachment links" do
object = Object.normalize(activity)
assert {:ok, nil} = HTML.extract_first_external_url(object, object.data["content"])
assert {:ok, nil} = HTML.extract_first_external_url_from_object(object)
end
end
end

View file

@ -296,9 +296,45 @@ test "posting a fake status", %{conn: conn} do
assert real_status == fake_status
end
test "fake statuses' preview card is not cached", %{conn: conn} do
clear_config([:rich_media, :enabled], true)
Tesla.Mock.mock(fn
%{
method: :get,
url: "https://example.com/twitter-card"
} ->
%Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/twitter_card.html")}
env ->
apply(HttpRequestMock, :request, [env])
end)
conn1 =
conn
|> put_req_header("content-type", "application/json")
|> post("/api/v1/statuses", %{
"status" => "https://example.com/ogp",
"preview" => true
})
conn2 =
conn
|> put_req_header("content-type", "application/json")
|> post("/api/v1/statuses", %{
"status" => "https://example.com/twitter-card",
"preview" => true
})
assert %{"card" => %{"title" => "The Rock"}} = json_response_and_validate_schema(conn1, 200)
assert %{"card" => %{"title" => "Small Island Developing States Photo Submission"}} =
json_response_and_validate_schema(conn2, 200)
end
test "posting a status with OGP link preview", %{conn: conn} do
Tesla.Mock.mock(fn env -> apply(HttpRequestMock, :request, [env]) end)
Config.put([:rich_media, :enabled], true)
clear_config([:rich_media, :enabled], true)
conn =
conn