diff --git a/CHANGELOG.md b/CHANGELOG.md
index b69cd6734..569c6b616 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -16,6 +16,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
## Fixed
- Meilisearch: order of results returned from our REST API now actually matches how Meilisearch ranks results
+## Changed
+- Refactored Rich Media to cache the content in the database. Fetching operations that could block status rendering have been eliminated.
+
## 2024.04.1 (Security)
## Fixed
diff --git a/changelog.d/card-endpoint.remove b/changelog.d/card-endpoint.remove
deleted file mode 100644
index e09a24cf7..000000000
--- a/changelog.d/card-endpoint.remove
+++ /dev/null
@@ -1 +0,0 @@
-Mastodon API: Remove deprecated GET /api/v1/statuses/:id/card endpoint https://github.com/mastodon/mastodon/pull/11213
diff --git a/changelog.d/rich_media_refactor.change b/changelog.d/rich_media_refactor.change
deleted file mode 100644
index c0d4e3b0a..000000000
--- a/changelog.d/rich_media_refactor.change
+++ /dev/null
@@ -1 +0,0 @@
-Refactored Rich Media to cache the content in the database. Fetching operations that could block status rendering have been eliminated.
diff --git a/lib/pleroma/activity/html.ex b/lib/pleroma/activity/html.ex
index e4aaad523..92840b1c0 100644
--- a/lib/pleroma/activity/html.ex
+++ b/lib/pleroma/activity/html.ex
@@ -28,7 +28,7 @@ defp get_cache_keys_for(activity_id) do
end
end
- defp add_cache_key_for(activity_id, additional_key) do
+ def add_cache_key_for(activity_id, additional_key) do
current = get_cache_keys_for(activity_id)
unless additional_key in current do
diff --git a/lib/pleroma/html.ex b/lib/pleroma/html.ex
index 4972fb26c..01a5f8585 100644
--- a/lib/pleroma/html.ex
+++ b/lib/pleroma/html.ex
@@ -6,8 +6,6 @@ defmodule Pleroma.HTML do
# Scrubbers are compiled on boot so they can be configured in OTP releases
# @on_load :compile_scrubbers
- @cachex Pleroma.Config.get([:cachex, :provider], Cachex)
-
def compile_scrubbers do
dir = Path.join(:code.priv_dir(:pleroma), "scrubbers")
diff --git a/lib/pleroma/web/mastodon_api/views/status_view.ex b/lib/pleroma/web/mastodon_api/views/status_view.ex
index 982c5b137..1055fb4bc 100644
--- a/lib/pleroma/web/mastodon_api/views/status_view.ex
+++ b/lib/pleroma/web/mastodon_api/views/status_view.ex
@@ -294,18 +294,6 @@ def render("show.json", %{activity: %{id: id, data: %{"object" => _object}} = ac
# Here the implicit index of the current content is 0
chrono_order = history_len - 1
- content =
- object
- |> render_content()
-
- quote_post =
- if visible_for_user?(quote_activity, opts[:for]) and opts[:show_quote] != false do
- quote_rendering_opts = Map.merge(opts, %{activity: quote_activity, show_quote: false})
- render("show.json", quote_rendering_opts)
- else
- nil
- end
-
content =
object
|> render_content()
@@ -318,64 +306,12 @@ def render("show.json", %{activity: %{id: id, data: %{"object" => _object}} = ac
"mastoapi:content:#{chrono_order}"
)
- content_plaintext =
- content
- |> Activity.HTML.get_cached_stripped_html_for_activity(
- activity,
- "mastoapi:content:#{chrono_order}"
- )
-
- summary = object.data["summary"] || ""
-
card =
case Card.get_by_activity(activity) do
%Card{} = result -> render("card.json", result)
_ -> nil
end
- url =
- if user.local do
- Pleroma.Web.Router.Helpers.o_status_url(Pleroma.Web.Endpoint, :notice, activity)
- else
- object.data["url"] || object.data["external_url"] || object.data["id"]
- end
-
- direct_conversation_id =
- with {_, nil} <- {:direct_conversation_id, opts[:direct_conversation_id]},
- {_, true} <- {:include_id, opts[:with_direct_conversation_id]},
- {_, %User{} = for_user} <- {:for_user, opts[:for]} do
- Activity.direct_conversation_id(activity, for_user)
- else
- {:direct_conversation_id, participation_id} when is_integer(participation_id) ->
- participation_id
-
- _e ->
- nil
- end
-
- emoji_reactions =
- object
- |> Object.get_emoji_reactions()
- |> EmojiReactionController.filter_allowed_users(
- opts[:for],
- Map.get(opts, :with_muted, false)
- )
- |> Stream.map(fn {emoji, users, url} ->
- build_emoji_map(emoji, users, url, opts[:for])
- end)
- |> Enum.to_list()
-
- # Status muted state (would do 1 request per status unless user mutes are preloaded)
- muted =
- thread_muted? ||
- UserRelationship.exists?(
- get_in(opts, [:relationships, :user_relationships]),
- :mute,
- opts[:for],
- user,
- fn for_user, user -> User.mutes?(for_user, user) end
- )
-
content_plaintext =
content
|> Activity.HTML.get_cached_stripped_html_for_activity(
@@ -385,8 +321,6 @@ def render("show.json", %{activity: %{id: id, data: %{"object" => _object}} = ac
summary = object.data["summary"] || ""
- card = render("card.json", Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity))
-
url =
if user.local do
url(~p[/notice/#{activity}])
diff --git a/lib/pleroma/web/rich_media/parser.ex b/lib/pleroma/web/rich_media/parser.ex
index 0fb09f5a8..7f6b5d388 100644
--- a/lib/pleroma/web/rich_media/parser.ex
+++ b/lib/pleroma/web/rich_media/parser.ex
@@ -1,5 +1,5 @@
# Pleroma: A lightweight social networking server
-# Copyright © 2017-2021 Pleroma Authors
+# Copyright © 2017-2022 Pleroma Authors
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.RichMedia.Parser do
@@ -15,10 +15,14 @@ def parse(nil), do: nil
@spec parse(String.t()) :: {:ok, map()} | {:error, any()}
def parse(url) do
- with :ok <- validate_page_url(url),
+ with {_, true} <- {:config, @config_impl.get([:rich_media, :enabled])},
+ :ok <- validate_page_url(url),
{:ok, data} <- parse_url(url) do
data = Map.put(data, "url", url)
{:ok, data}
+ else
+ {:config, _} -> {:error, :rich_media_disabled}
+ e -> e
end
end
@@ -32,21 +36,6 @@ defp parse_url(url) do
end
end
- def parse_with_timeout(url) do
- try do
- task =
- Task.Supervisor.async_nolink(Pleroma.TaskSupervisor, fn ->
- parse_url(url)
- end)
-
- Task.await(task, 5000)
- catch
- :exit, {:timeout, _} ->
- Logger.warning("Timeout while fetching rich media for #{url}")
- {:error, :timeout}
- end
- end
-
defp maybe_parse(html) do
Enum.reduce_while(parsers(), %{}, fn parser, acc ->
case parser.parse(html, acc) do
@@ -72,4 +61,46 @@ defp clean_parsed_data(data) do
end)
|> Map.new()
end
+
+ @spec validate_page_url(URI.t() | binary()) :: :ok | :error
+ defp validate_page_url(page_url) when is_binary(page_url) do
+ validate_tld = @config_impl.get([Pleroma.Formatter, :validate_tld])
+
+ page_url
+ |> Linkify.Parser.url?(validate_tld: validate_tld)
+ |> parse_uri(page_url)
+ end
+
+ defp validate_page_url(%URI{host: host, scheme: "https"}) do
+ cond do
+ Linkify.Parser.ip?(host) ->
+ :error
+
+ host in @config_impl.get([:rich_media, :ignore_hosts], []) ->
+ :error
+
+ get_tld(host) in @config_impl.get([:rich_media, :ignore_tld], []) ->
+ :error
+
+ true ->
+ :ok
+ end
+ end
+
+ defp validate_page_url(_), do: :error
+
+ defp parse_uri(true, url) do
+ url
+ |> URI.parse()
+ |> validate_page_url
+ end
+
+ defp parse_uri(_, _), do: :error
+
+ defp get_tld(host) do
+ host
+ |> String.split(".")
+ |> Enum.reverse()
+ |> hd
+ end
end
diff --git a/lib/pleroma/web/rich_media/parsers/o_embed.ex b/lib/pleroma/web/rich_media/parsers/o_embed.ex
index 695740d2e..0f303176c 100644
--- a/lib/pleroma/web/rich_media/parsers/o_embed.ex
+++ b/lib/pleroma/web/rich_media/parsers/o_embed.ex
@@ -1,5 +1,5 @@
# Pleroma: A lightweight social networking server
-# Copyright © 2017-2021 Pleroma Authors
+# Copyright © 2017-2022 Pleroma Authors
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.RichMedia.Parsers.OEmbed do
diff --git a/mix.exs b/mix.exs
index 1aec1a21e..a789cd967 100644
--- a/mix.exs
+++ b/mix.exs
@@ -157,7 +157,7 @@ defp deps do
{:floki, "~> 0.34"},
{:timex, "~> 3.7"},
{:ueberauth, "== 0.10.5"},
- {:linkify, git: "https://akkoma.dev/AkkomaGang/linkify.git"},
+ {:linkify, "~> 0.5.3"},
{:http_signatures,
git: "https://akkoma.dev/AkkomaGang/http_signatures.git",
ref: "6640ce7d24c783ac2ef56e27d00d12e8dc85f396"},
diff --git a/mix.lock b/mix.lock
index 04dc170ba..ab24b8dbc 100644
--- a/mix.lock
+++ b/mix.lock
@@ -65,7 +65,7 @@
"joken": {:hex, :joken, "2.6.1", "2ca3d8d7f83bf7196296a3d9b2ecda421a404634bfc618159981a960020480a1", [:mix], [{:jose, "~> 1.11.9", [hex: :jose, repo: "hexpm", optional: false]}], "hexpm", "ab26122c400b3d254ce7d86ed066d6afad27e70416df947cdcb01e13a7382e68"},
"jose": {:hex, :jose, "1.11.10", "a903f5227417bd2a08c8a00a0cbcc458118be84480955e8d251297a425723f83", [:mix, :rebar3], [], "hexpm", "0d6cd36ff8ba174db29148fc112b5842186b68a90ce9fc2b3ec3afe76593e614"},
"jumper": {:hex, :jumper, "1.0.2", "68cdcd84472a00ac596b4e6459a41b3062d4427cbd4f1e8c8793c5b54f1406a7", [:mix], [], "hexpm", "9b7782409021e01ab3c08270e26f36eb62976a38c1aa64b2eaf6348422f165e1"},
- "linkify": {:git, "https://akkoma.dev/AkkomaGang/linkify.git", "2567e2c1073fa371fd26fd66dfa5bc77b6919c16", []},
+ "linkify": {:hex, :linkify, "0.5.3", "5f8143d8f61f5ff08d3aeeff47ef6509492b4948d8f08007fbf66e4d2246a7f2", [:mix], [], "hexpm", "3ef35a1377d47c25506e07c1c005ea9d38d700699d92ee92825f024434258177"},
"mail": {:hex, :mail, "0.3.1", "cb0a14e4ed8904e4e5a08214e686ccf6f9099346885db17d8c309381f865cc5c", [:mix], [], "hexpm", "1db701e89865c1d5fa296b2b57b1cd587587cca8d8a1a22892b35ef5a8e352a6"},
"majic": {:git, "https://akkoma.dev/AkkomaGang/majic.git", "80540b36939ec83f48e76c61e5000e0fd67706f0", [ref: "80540b36939ec83f48e76c61e5000e0fd67706f0"]},
"makeup": {:hex, :makeup, "1.1.2", "9ba8837913bdf757787e71c1581c21f9d2455f4dd04cfca785c70bbfff1a76a3", [:mix], [{:nimble_parsec, "~> 1.2.2 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "cce1566b81fbcbd21eca8ffe808f33b221f9eee2cbc7a1706fc3da9ff18e6cac"},
diff --git a/test/fixtures/rich_media/google.html b/test/fixtures/rich_media/google.html
new file mode 100644
index 000000000..c068397a5
--- /dev/null
+++ b/test/fixtures/rich_media/google.html
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/test/fixtures/rich_media/yahoo.html b/test/fixtures/rich_media/yahoo.html
new file mode 100644
index 000000000..41d8c5cd9
--- /dev/null
+++ b/test/fixtures/rich_media/yahoo.html
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/test/mix/tasks/pleroma/database_test.exs b/test/mix/tasks/pleroma/database_test.exs
index f28bef51c..ba7d284d5 100644
--- a/test/mix/tasks/pleroma/database_test.exs
+++ b/test/mix/tasks/pleroma/database_test.exs
@@ -398,6 +398,7 @@ test "We don't have unexpected tables which may contain objects that are referen
["push_subscriptions"],
["registrations"],
["report_notes"],
+ ["rich_media_card"],
["scheduled_activities"],
["schema_migrations"],
["thread_mutes"],
diff --git a/test/pleroma/web/mastodon_api/views/status_view_test.exs b/test/pleroma/web/mastodon_api/views/status_view_test.exs
index ce251a7ba..7db3e3e61 100644
--- a/test/pleroma/web/mastodon_api/views/status_view_test.exs
+++ b/test/pleroma/web/mastodon_api/views/status_view_test.exs
@@ -17,10 +17,12 @@ defmodule Pleroma.Web.MastodonAPI.StatusViewTest do
alias Pleroma.Web.MastodonAPI.AccountView
alias Pleroma.Web.MastodonAPI.StatusView
alias Pleroma.Web.RichMedia.Card
+ alias Pleroma.UnstubbedConfigMock, as: ConfigMock
import Pleroma.Factory
import Tesla.Mock
import OpenApiSpex.TestAssertions
+ import Mox
setup do
mock(fn env -> apply(HttpRequestMock, :request, [env]) end)
diff --git a/test/pleroma/web/rich_media/parser_test.exs b/test/pleroma/web/rich_media/parser_test.exs
index 40d00a170..a5f2563a2 100644
--- a/test/pleroma/web/rich_media/parser_test.exs
+++ b/test/pleroma/web/rich_media/parser_test.exs
@@ -1,96 +1,30 @@
# Pleroma: A lightweight social networking server
-# Copyright © 2017-2021 Pleroma Authors
+# Copyright © 2017-2022 Pleroma Authors
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.RichMedia.ParserTest do
use Pleroma.DataCase
+
alias Pleroma.Web.RichMedia.Parser
+ import Tesla.Mock
+
setup do
- Tesla.Mock.mock_global(fn
- %{
- method: :get,
- url: "http://example.com/ogp"
- } ->
- %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/ogp.html")}
-
- %{
- method: :get,
- url: "http://example.com/non-ogp"
- } ->
- %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/non_ogp_embed.html")}
-
- %{
- method: :get,
- url: "http://example.com/ogp-missing-title"
- } ->
- %Tesla.Env{
- status: 200,
- body: File.read!("test/fixtures/rich_media/ogp-missing-title.html")
- }
-
- %{
- method: :get,
- url: "http://example.com/twitter-card"
- } ->
- %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/twitter_card.html")}
-
- %{
- method: :get,
- url: "http://example.com/oembed"
- } ->
- %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/oembed.html")}
-
- %{
- method: :get,
- url: "http://example.com/oembed.json"
- } ->
- %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/oembed.json")}
-
- %{method: :get, url: "http://example.com/empty"} ->
- %Tesla.Env{status: 200, body: "hello"}
-
- %{method: :get, url: "http://example.com/malformed"} ->
- %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/malformed-data.html")}
-
- %{method: :get, url: "http://example.com/error"} ->
- {:error, :overload}
-
- %{
- method: :head,
- url: "http://example.com/huge-page"
- } ->
- %Tesla.Env{
- status: 200,
- headers: [{"content-length", "2000001"}, {"content-type", "text/html"}]
- }
-
- %{
- method: :head,
- url: "http://example.com/pdf-file"
- } ->
- %Tesla.Env{
- status: 200,
- headers: [{"content-length", "1000000"}, {"content-type", "application/pdf"}]
- }
-
- %{method: :head} ->
- %Tesla.Env{status: 404, body: "", headers: []}
- end)
-
- :ok
+ mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end)
end
+ setup_all do: clear_config([:rich_media, :enabled], true)
+
test "returns error when no metadata present" do
- assert {:error, _} = Parser.parse("http://example.com/empty")
+ assert {:error, _} = Parser.parse("https://example.com/empty")
end
test "doesn't just add a title" do
- assert {:error, {:invalid_metadata, _}} = Parser.parse("http://example.com/non-ogp")
+ assert {:error, {:invalid_metadata, _}} = Parser.parse("https://example.com/non-ogp")
end
test "parses ogp" do
- assert Parser.parse("http://example.com/ogp") ==
+ assert Parser.parse("https://example.com/ogp") ==
{:ok,
%{
"image" => "http://ia.media-imdb.com/images/rock.jpg",
@@ -98,12 +32,12 @@ test "parses ogp" do
"description" =>
"Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.",
"type" => "video.movie",
- "url" => "http://example.com/ogp"
+ "url" => "https://example.com/ogp"
}}
end
test "falls back to
when ogp:title is missing" do
- assert Parser.parse("http://example.com/ogp-missing-title") ==
+ assert Parser.parse("https://example.com/ogp-missing-title") ==
{:ok,
%{
"image" => "http://ia.media-imdb.com/images/rock.jpg",
@@ -111,12 +45,12 @@ test "falls back to when ogp:title is missing" do
"description" =>
"Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.",
"type" => "video.movie",
- "url" => "http://example.com/ogp-missing-title"
+ "url" => "https://example.com/ogp-missing-title"
}}
end
test "parses twitter card" do
- assert Parser.parse("http://example.com/twitter-card") ==
+ assert Parser.parse("https://example.com/twitter-card") ==
{:ok,
%{
"card" => "summary",
@@ -124,12 +58,12 @@ test "parses twitter card" do
"image" => "https://farm6.staticflickr.com/5510/14338202952_93595258ff_z.jpg",
"title" => "Small Island Developing States Photo Submission",
"description" => "View the album on Flickr.",
- "url" => "http://example.com/twitter-card"
+ "url" => "https://example.com/twitter-card"
}}
end
test "parses OEmbed and filters HTML tags" do
- assert Parser.parse("http://example.com/oembed") ==
+ assert Parser.parse("https://example.com/oembed") ==
{:ok,
%{
"author_name" => "\u202E\u202D\u202Cbees\u202C",
@@ -149,7 +83,7 @@ test "parses OEmbed and filters HTML tags" do
"thumbnail_width" => 150,
"title" => "Bacon Lollys",
"type" => "photo",
- "url" => "http://example.com/oembed",
+ "url" => "https://example.com/oembed",
"version" => "1.0",
"web_page" => "https://www.flickr.com/photos/bees/2362225867/",
"web_page_short_url" => "https://flic.kr/p/4AK2sc",
@@ -158,19 +92,19 @@ test "parses OEmbed and filters HTML tags" do
end
test "rejects invalid OGP data" do
- assert {:error, _} = Parser.parse("http://example.com/malformed")
+ assert {:error, _} = Parser.parse("https://example.com/malformed")
end
test "returns error if getting page was not successful" do
- assert {:error, :overload} = Parser.parse("http://example.com/error")
+ assert {:error, :overload} = Parser.parse("https://example.com/error")
end
test "does a HEAD request to check if the body is too large" do
- assert {:error, :body_too_large} = Parser.parse("http://example.com/huge-page")
+ assert {:error, :body_too_large} = Parser.parse("https://example.com/huge-page")
end
test "does a HEAD request to check if the body is html" do
- assert {:error, {:content_type, _}} = Parser.parse("http://example.com/pdf-file")
+ assert {:error, {:content_type, _}} = Parser.parse("https://example.com/pdf-file")
end
test "refuses to crawl incomplete URLs" do
@@ -195,4 +129,10 @@ test "refuses to crawl URLs of private network from posts" do
assert :error == Parser.parse(url)
end)
end
+
+ test "returns error when disabled" do
+ clear_config([:rich_media, :enabled], false)
+
+ assert match?({:error, :rich_media_disabled}, Parser.parse("https://example.com/ogp"))
+ end
end
diff --git a/test/support/http_request_mock.ex b/test/support/http_request_mock.ex
index 7bfcfaa0e..6a01393e3 100644
--- a/test/support/http_request_mock.ex
+++ b/test/support/http_request_mock.ex
@@ -1708,14 +1708,41 @@ def post(url, query, body, headers) do
# Most of the rich media mocks are missing HEAD requests, so we just return 404.
@rich_media_mocks [
+ "https://example.com/empty",
+ "https://example.com/error",
+ "https://example.com/malformed",
+ "https://example.com/non-ogp",
+ "https://example.com/oembed",
+ "https://example.com/oembed.json",
"https://example.com/ogp",
"https://example.com/ogp-missing-data",
- "https://example.com/twitter-card"
+ "https://example.com/ogp-missing-title",
+ "https://example.com/twitter-card",
+ "https://google.com/",
+ "https://pleroma.local/notice/9kCP7V",
+ "https://yahoo.com/"
]
+
def head(url, _query, _body, _headers) when url in @rich_media_mocks do
{:ok, %Tesla.Env{status: 404, body: ""}}
end
+ def head("https://example.com/pdf-file", _, _, _) do
+ {:ok,
+ %Tesla.Env{
+ status: 200,
+ headers: [{"content-length", "1000000"}, {"content-type", "application/pdf"}]
+ }}
+ end
+
+ def head("https://example.com/huge-page", _, _, _) do
+ {:ok,
+ %Tesla.Env{
+ status: 200,
+ headers: [{"content-length", "2000001"}, {"content-type", "text/html"}]
+ }}
+ end
+
def head(url, query, body, headers) do
{:error,
"Mock response not implemented for HEAD #{inspect(url)}, #{query}, #{inspect(body)}, #{inspect(headers)}"}
diff --git a/test/support/mocks.ex b/test/support/mocks.ex
index fd8f825b3..cc34b21a5 100644
--- a/test/support/mocks.ex
+++ b/test/support/mocks.ex
@@ -26,5 +26,6 @@
Mox.defmock(Pleroma.Web.FederatorMock, for: Pleroma.Web.Federator.Publishing)
Mox.defmock(Pleroma.ConfigMock, for: Pleroma.Config.Getting)
+Mox.defmock(Pleroma.UnstubbedConfigMock, for: Pleroma.Config.Getting)
Mox.defmock(Pleroma.LoggerMock, for: Pleroma.Logging)
diff --git a/test/test_helper.exs b/test/test_helper.exs
index 22a0f33ee..dafa45099 100644
--- a/test/test_helper.exs
+++ b/test/test_helper.exs
@@ -17,3 +17,16 @@
uploads = Pleroma.Config.get([Pleroma.Uploaders.Local, :uploads], "test/uploads")
File.rm_rf!(uploads)
end)
+
+defmodule Pleroma.Test.StaticConfig do
+ @moduledoc """
+ This module provides a Config that is completely static, built at startup time from the environment. It's safe to use in testing as it will not modify any state.
+ """
+
+ @behaviour Pleroma.Config.Getting
+ @config Application.get_all_env(:pleroma)
+
+ def get(path, default \\ nil) do
+ get_in(@config, path) || default
+ end
+end