forked from AkkomaGang/akkoma
Merge 2024-11 stable #13
16 changed files with 431 additions and 240 deletions
1
changelog.d/rich_media_refactor.change
Normal file
1
changelog.d/rich_media_refactor.change
Normal file
|
@ -0,0 +1 @@
|
|||
Refactored Rich Media to cache the content in the database. Fetching operations that could block status rendering have been eliminated.
|
|
@ -579,7 +579,8 @@
|
|||
mute_expire: 5,
|
||||
search_indexing: 10,
|
||||
nodeinfo_fetcher: 1,
|
||||
database_prune: 1
|
||||
database_prune: 1,
|
||||
rich_media_expiration: 2
|
||||
],
|
||||
plugins: [
|
||||
Oban.Plugins.Pruner,
|
||||
|
|
|
@ -141,6 +141,8 @@
|
|||
config :pleroma, :instances_favicons, enabled: false
|
||||
config :pleroma, :instances_nodeinfo, enabled: false
|
||||
|
||||
config :pleroma, Pleroma.Web.RichMedia.Backfill, provider: Pleroma.Web.RichMedia.Backfill
|
||||
|
||||
if File.exists?("./config/test.secret.exs") do
|
||||
import_config "test.secret.exs"
|
||||
else
|
||||
|
|
|
@ -67,22 +67,9 @@ def ensure_scrubbed_html(
|
|||
end
|
||||
end
|
||||
|
||||
def extract_first_external_url_from_object(%{data: %{"content" => content}} = object)
|
||||
@spec extract_first_external_url_from_object(Pleroma.Object.t()) :: String.t() | nil
|
||||
def extract_first_external_url_from_object(%{data: %{"content" => content}})
|
||||
when is_binary(content) do
|
||||
unless object.data["fake"] do
|
||||
key = "URL|#{object.id}"
|
||||
|
||||
@cachex.fetch!(:scrubber_cache, key, fn _key ->
|
||||
{:commit, {:ok, extract_first_external_url(content)}}
|
||||
end)
|
||||
else
|
||||
{:ok, extract_first_external_url(content)}
|
||||
end
|
||||
end
|
||||
|
||||
def extract_first_external_url_from_object(_), do: {:error, :no_content}
|
||||
|
||||
def extract_first_external_url(content) do
|
||||
content
|
||||
|> Floki.parse_fragment!()
|
||||
|> Floki.find("a:not(.mention,.hashtag,.attachment,[rel~=\"tag\"])")
|
||||
|
@ -90,4 +77,6 @@ def extract_first_external_url(content) do
|
|||
|> Floki.attribute("href")
|
||||
|> Enum.at(0)
|
||||
end
|
||||
|
||||
def extract_first_external_url_from_object(_), do: nil
|
||||
end
|
||||
|
|
|
@ -155,9 +155,7 @@ def insert(map, local \\ true, fake \\ false, bypass_actor_check \\ false) when
|
|||
# Splice in the child object if we have one.
|
||||
activity = Maps.put_if_present(activity, :object, object)
|
||||
|
||||
ConcurrentLimiter.limit(Pleroma.Web.RichMedia.Helpers, fn ->
|
||||
Task.start(fn -> Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) end)
|
||||
end)
|
||||
Pleroma.Web.RichMedia.Card.get_by_activity(activity)
|
||||
|
||||
# Add local posts to search index
|
||||
if local, do: Pleroma.Search.add_to_index(activity)
|
||||
|
@ -185,7 +183,7 @@ def insert(map, local \\ true, fake \\ false, bypass_actor_check \\ false) when
|
|||
id: "pleroma:fakeid"
|
||||
}
|
||||
|
||||
Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity)
|
||||
Pleroma.Web.RichMedia.Card.get_by_activity(activity)
|
||||
{:ok, activity}
|
||||
|
||||
{:remote_limit_pass, _} ->
|
||||
|
|
|
@ -225,9 +225,7 @@ def handle(%{data: %{"type" => "Create"}} = activity, meta) do
|
|||
end
|
||||
end
|
||||
|
||||
ConcurrentLimiter.limit(Pleroma.Web.RichMedia.Helpers, fn ->
|
||||
Task.start(fn -> Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) end)
|
||||
end)
|
||||
Pleroma.Web.RichMedia.Card.get_by_activity(activity)
|
||||
|
||||
Pleroma.Search.add_to_index(Map.put(activity, :object, object))
|
||||
|
||||
|
|
|
@ -25,6 +25,7 @@ defmodule Pleroma.Web.MastodonAPI.StatusController do
|
|||
alias Pleroma.Web.OAuth.Token
|
||||
alias Pleroma.Web.Plugs.OAuthScopesPlug
|
||||
alias Pleroma.Web.Plugs.RateLimiter
|
||||
alias Pleroma.Web.RichMedia.Card
|
||||
|
||||
plug(Pleroma.Web.ApiSpec.CastAndValidate)
|
||||
|
||||
|
@ -383,6 +384,21 @@ def unmute_conversation(%{assigns: %{user: user}} = conn, %{id: id}) do
|
|||
end
|
||||
end
|
||||
|
||||
@doc "GET /api/v1/statuses/:id/card"
|
||||
@deprecated "https://github.com/tootsuite/mastodon/pull/11213"
|
||||
def card(
|
||||
%{assigns: %{user: user}, private: %{open_api_spex: %{params: %{id: status_id}}}} = conn,
|
||||
_
|
||||
) do
|
||||
with %Activity{} = activity <- Activity.get_by_id(status_id),
|
||||
true <- Visibility.visible_for_user?(activity, user),
|
||||
%Card{} = card_data <- Card.get_by_activity(activity) do
|
||||
render(conn, "card.json", card_data)
|
||||
else
|
||||
_ -> render_error(conn, :not_found, "Record not found")
|
||||
end
|
||||
end
|
||||
|
||||
@doc "GET /api/v1/statuses/:id/favourited_by"
|
||||
def favourited_by(%{assigns: %{user: user}} = conn, %{id: id}) do
|
||||
with true <- Pleroma.Config.get([:instance, :show_reactions]),
|
||||
|
|
|
@ -22,6 +22,7 @@ defmodule Pleroma.Web.MastodonAPI.StatusView do
|
|||
alias Pleroma.Web.MediaProxy
|
||||
alias Pleroma.Web.PleromaAPI.EmojiReactionController
|
||||
require Logger
|
||||
alias Pleroma.Web.RichMedia.Card
|
||||
|
||||
import Pleroma.Web.ActivityPub.Visibility, only: [get_visibility: 1, visible_for_user?: 2]
|
||||
|
||||
|
@ -30,9 +31,7 @@ defmodule Pleroma.Web.MastodonAPI.StatusView do
|
|||
# pagination is restricted to 40 activities at a time
|
||||
defp fetch_rich_media_for_activities(activities) do
|
||||
Enum.each(activities, fn activity ->
|
||||
spawn(fn ->
|
||||
Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity)
|
||||
end)
|
||||
spawn(fn -> Card.get_by_activity(activity) end)
|
||||
end)
|
||||
end
|
||||
|
||||
|
@ -93,9 +92,7 @@ def render("index.json", opts) do
|
|||
# To do: check AdminAPIControllerTest on the reasons behind nil activities in the list
|
||||
activities = Enum.filter(opts.activities, & &1)
|
||||
|
||||
# Start fetching rich media before doing anything else, so that later calls to get the cards
|
||||
# only block for timeout in the worst case, as opposed to
|
||||
# length(activities_with_links) * timeout
|
||||
# Start prefetching rich media before doing anything else
|
||||
fetch_rich_media_for_activities(activities)
|
||||
replied_to_activities = get_replied_to_activities(activities)
|
||||
|
||||
|
@ -301,12 +298,82 @@ def render("show.json", %{activity: %{id: id, data: %{"object" => _object}} = ac
|
|||
object
|
||||
|> render_content()
|
||||
|
||||
content_html =
|
||||
content
|
||||
|> Activity.HTML.get_cached_scrubbed_html_for_activity(
|
||||
User.html_filter_policy(opts[:for]),
|
||||
activity,
|
||||
"mastoapi:content:#{chrono_order}"
|
||||
quote_post =
|
||||
if visible_for_user?(quote_activity, opts[:for]) and opts[:show_quote] != false do
|
||||
quote_rendering_opts = Map.merge(opts, %{activity: quote_activity, show_quote: false})
|
||||
render("show.json", quote_rendering_opts)
|
||||
else
|
||||
nil
|
||||
end
|
||||
|
||||
content =
|
||||
object
|
||||
|> render_content()
|
||||
|
||||
content_html =
|
||||
content
|
||||
|> Activity.HTML.get_cached_scrubbed_html_for_activity(
|
||||
User.html_filter_policy(opts[:for]),
|
||||
activity,
|
||||
"mastoapi:content:#{chrono_order}"
|
||||
)
|
||||
|
||||
content_plaintext =
|
||||
content
|
||||
|> Activity.HTML.get_cached_stripped_html_for_activity(
|
||||
activity,
|
||||
"mastoapi:content:#{chrono_order}"
|
||||
)
|
||||
|
||||
summary = object.data["summary"] || ""
|
||||
|
||||
card =
|
||||
case Card.get_by_activity(activity) do
|
||||
%Card{} = result -> render("card.json", result)
|
||||
_ -> nil
|
||||
end
|
||||
|
||||
url =
|
||||
if user.local do
|
||||
Pleroma.Web.Router.Helpers.o_status_url(Pleroma.Web.Endpoint, :notice, activity)
|
||||
else
|
||||
object.data["url"] || object.data["external_url"] || object.data["id"]
|
||||
end
|
||||
|
||||
direct_conversation_id =
|
||||
with {_, nil} <- {:direct_conversation_id, opts[:direct_conversation_id]},
|
||||
{_, true} <- {:include_id, opts[:with_direct_conversation_id]},
|
||||
{_, %User{} = for_user} <- {:for_user, opts[:for]} do
|
||||
Activity.direct_conversation_id(activity, for_user)
|
||||
else
|
||||
{:direct_conversation_id, participation_id} when is_integer(participation_id) ->
|
||||
participation_id
|
||||
|
||||
_e ->
|
||||
nil
|
||||
end
|
||||
|
||||
emoji_reactions =
|
||||
object
|
||||
|> Object.get_emoji_reactions()
|
||||
|> EmojiReactionController.filter_allowed_users(
|
||||
opts[:for],
|
||||
Map.get(opts, :with_muted, false)
|
||||
)
|
||||
|> Stream.map(fn {emoji, users, url} ->
|
||||
build_emoji_map(emoji, users, url, opts[:for])
|
||||
end)
|
||||
|> Enum.to_list()
|
||||
|
||||
# Status muted state (would do 1 request per status unless user mutes are preloaded)
|
||||
muted =
|
||||
thread_muted? ||
|
||||
UserRelationship.exists?(
|
||||
get_in(opts, [:relationships, :user_relationships]),
|
||||
:mute,
|
||||
opts[:for],
|
||||
user,
|
||||
fn for_user, user -> User.mutes?(for_user, user) end
|
||||
)
|
||||
|
||||
content_plaintext =
|
||||
|
@ -528,15 +595,8 @@ def render("source.json", %{activity: %{data: %{"object" => _object}} = activity
|
|||
}
|
||||
end
|
||||
|
||||
def render("card.json", %{rich_media: rich_media, page_url: page_url}) do
|
||||
page_url_data = URI.parse(page_url)
|
||||
|
||||
page_url_data =
|
||||
if is_binary(rich_media["url"]) do
|
||||
URI.merge(page_url_data, URI.parse(rich_media["url"]))
|
||||
else
|
||||
page_url_data
|
||||
end
|
||||
def render("card.json", %Card{fields: rich_media}) do
|
||||
page_url_data = URI.parse(rich_media["url"])
|
||||
|
||||
page_url = page_url_data |> to_string
|
||||
|
||||
|
|
101
lib/pleroma/web/rich_media/backfill.ex
Normal file
101
lib/pleroma/web/rich_media/backfill.ex
Normal file
|
@ -0,0 +1,101 @@
|
|||
# Pleroma: A lightweight social networking server
|
||||
# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/>
|
||||
# SPDX-License-Identifier: AGPL-3.0-only
|
||||
|
||||
defmodule Pleroma.Web.RichMedia.Backfill.Task do
|
||||
alias Pleroma.Web.RichMedia.Backfill
|
||||
|
||||
def run(args) do
|
||||
Task.Supervisor.start_child(Pleroma.TaskSupervisor, Backfill, :run, [args],
|
||||
name: {:global, {:rich_media, args.url_hash}}
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
defmodule Pleroma.Web.RichMedia.Backfill do
|
||||
alias Pleroma.Web.RichMedia.Card
|
||||
alias Pleroma.Web.RichMedia.Parser
|
||||
alias Pleroma.Web.RichMedia.Parser.TTL
|
||||
alias Pleroma.Workers.RichMediaExpirationWorker
|
||||
|
||||
require Logger
|
||||
|
||||
@backfiller Pleroma.Config.get([__MODULE__, :provider], Pleroma.Web.RichMedia.Backfill.Task)
|
||||
@cachex Pleroma.Config.get([:cachex, :provider], Cachex)
|
||||
@max_attempts 3
|
||||
@retry 5_000
|
||||
|
||||
def start(%{url: url} = args) when is_binary(url) do
|
||||
url_hash = Card.url_to_hash(url)
|
||||
|
||||
args =
|
||||
args
|
||||
|> Map.put(:attempt, 1)
|
||||
|> Map.put(:url_hash, url_hash)
|
||||
|
||||
@backfiller.run(args)
|
||||
end
|
||||
|
||||
def run(%{url: url, url_hash: url_hash, attempt: attempt} = args)
|
||||
when attempt <= @max_attempts do
|
||||
case Parser.parse(url) do
|
||||
{:ok, fields} ->
|
||||
{:ok, card} = Card.create(url, fields)
|
||||
|
||||
maybe_schedule_expiration(url, fields)
|
||||
|
||||
if Map.has_key?(args, :activity_id) do
|
||||
stream_update(args)
|
||||
end
|
||||
|
||||
warm_cache(url_hash, card)
|
||||
|
||||
{:error, {:invalid_metadata, fields}} ->
|
||||
Logger.debug("Rich media incomplete or invalid metadata for #{url}: #{inspect(fields)}")
|
||||
negative_cache(url_hash)
|
||||
|
||||
{:error, :body_too_large} ->
|
||||
Logger.error("Rich media error for #{url}: :body_too_large")
|
||||
negative_cache(url_hash)
|
||||
|
||||
{:error, {:content_type, type}} ->
|
||||
Logger.debug("Rich media error for #{url}: :content_type is #{type}")
|
||||
negative_cache(url_hash)
|
||||
|
||||
e ->
|
||||
Logger.debug("Rich media error for #{url}: #{inspect(e)}")
|
||||
|
||||
:timer.sleep(@retry * attempt)
|
||||
|
||||
run(%{args | attempt: attempt + 1})
|
||||
end
|
||||
end
|
||||
|
||||
def run(%{url: url, url_hash: url_hash}) do
|
||||
Logger.debug("Rich media failure for #{url}")
|
||||
|
||||
negative_cache(url_hash, :timer.minutes(15))
|
||||
end
|
||||
|
||||
defp maybe_schedule_expiration(url, fields) do
|
||||
case TTL.get_from_image(fields, url) do
|
||||
ttl when is_number(ttl) ->
|
||||
timestamp = DateTime.from_unix!(ttl)
|
||||
|
||||
RichMediaExpirationWorker.new(%{"url" => url}, scheduled_at: timestamp)
|
||||
|> Oban.insert()
|
||||
|
||||
_ ->
|
||||
:ok
|
||||
end
|
||||
end
|
||||
|
||||
defp stream_update(%{activity_id: activity_id}) do
|
||||
Pleroma.Activity.get_by_id(activity_id)
|
||||
|> Pleroma.Activity.normalize()
|
||||
|> Pleroma.Web.ActivityPub.ActivityPub.stream_out()
|
||||
end
|
||||
|
||||
defp warm_cache(key, val), do: @cachex.put(:rich_media_cache, key, val)
|
||||
defp negative_cache(key, ttl \\ nil), do: @cachex.put(:rich_media_cache, key, nil, ttl: ttl)
|
||||
end
|
157
lib/pleroma/web/rich_media/card.ex
Normal file
157
lib/pleroma/web/rich_media/card.ex
Normal file
|
@ -0,0 +1,157 @@
|
|||
defmodule Pleroma.Web.RichMedia.Card do
|
||||
use Ecto.Schema
|
||||
import Ecto.Changeset
|
||||
import Ecto.Query
|
||||
|
||||
alias Pleroma.Activity
|
||||
alias Pleroma.HTML
|
||||
alias Pleroma.Object
|
||||
alias Pleroma.Repo
|
||||
alias Pleroma.Web.RichMedia.Backfill
|
||||
alias Pleroma.Web.RichMedia.Parser
|
||||
|
||||
@cachex Pleroma.Config.get([:cachex, :provider], Cachex)
|
||||
@config_impl Application.compile_env(:pleroma, [__MODULE__, :config_impl], Pleroma.Config)
|
||||
|
||||
@type t :: %__MODULE__{}
|
||||
|
||||
schema "rich_media_card" do
|
||||
field(:url_hash, :binary)
|
||||
field(:fields, :map)
|
||||
|
||||
timestamps()
|
||||
end
|
||||
|
||||
@doc false
|
||||
def changeset(card, attrs) do
|
||||
card
|
||||
|> cast(attrs, [:url_hash, :fields])
|
||||
|> validate_required([:url_hash, :fields])
|
||||
|> unique_constraint(:url_hash)
|
||||
end
|
||||
|
||||
@spec create(String.t(), map()) :: {:ok, t()}
|
||||
def create(url, fields) do
|
||||
url_hash = url_to_hash(url)
|
||||
|
||||
fields = Map.put_new(fields, "url", url)
|
||||
|
||||
%__MODULE__{}
|
||||
|> changeset(%{url_hash: url_hash, fields: fields})
|
||||
|> Repo.insert(on_conflict: {:replace, [:fields]}, conflict_target: :url_hash)
|
||||
end
|
||||
|
||||
@spec delete(String.t()) :: {:ok, Ecto.Schema.t()} | {:error, Ecto.Changeset.t()} | :ok
|
||||
def delete(url) do
|
||||
url_hash = url_to_hash(url)
|
||||
@cachex.del(:rich_media_cache, url_hash)
|
||||
|
||||
case get_by_url(url) do
|
||||
%__MODULE{} = card -> Repo.delete(card)
|
||||
nil -> :ok
|
||||
end
|
||||
end
|
||||
|
||||
@spec get_by_url(String.t() | nil) :: t() | nil | :error
|
||||
def get_by_url(url) when is_binary(url) do
|
||||
if @config_impl.get([:rich_media, :enabled]) do
|
||||
url_hash = url_to_hash(url)
|
||||
|
||||
@cachex.fetch!(:rich_media_cache, url_hash, fn _ ->
|
||||
result =
|
||||
__MODULE__
|
||||
|> where(url_hash: ^url_hash)
|
||||
|> Repo.one()
|
||||
|
||||
case result do
|
||||
%__MODULE__{} = card -> {:commit, card}
|
||||
_ -> {:ignore, nil}
|
||||
end
|
||||
end)
|
||||
else
|
||||
:error
|
||||
end
|
||||
end
|
||||
|
||||
def get_by_url(nil), do: nil
|
||||
|
||||
@spec get_or_backfill_by_url(String.t(), map()) :: t() | nil
|
||||
def get_or_backfill_by_url(url, backfill_opts \\ %{}) do
|
||||
case get_by_url(url) do
|
||||
%__MODULE__{} = card ->
|
||||
card
|
||||
|
||||
nil ->
|
||||
backfill_opts = Map.put(backfill_opts, :url, url)
|
||||
|
||||
Backfill.start(backfill_opts)
|
||||
|
||||
nil
|
||||
|
||||
:error ->
|
||||
nil
|
||||
end
|
||||
end
|
||||
|
||||
@spec get_by_object(Object.t()) :: t() | nil | :error
|
||||
def get_by_object(object) do
|
||||
case HTML.extract_first_external_url_from_object(object) do
|
||||
nil -> nil
|
||||
url -> get_or_backfill_by_url(url)
|
||||
end
|
||||
end
|
||||
|
||||
@spec get_by_activity(Activity.t()) :: t() | nil | :error
|
||||
# Fake/Draft activity
|
||||
def get_by_activity(%Activity{id: "pleroma:fakeid"} = activity) do
|
||||
with %Object{} = object <- Object.normalize(activity, fetch: false),
|
||||
url when not is_nil(url) <- HTML.extract_first_external_url_from_object(object) do
|
||||
case get_by_url(url) do
|
||||
# Cache hit
|
||||
%__MODULE__{} = card ->
|
||||
card
|
||||
|
||||
# Cache miss, but fetch for rendering the Draft
|
||||
_ ->
|
||||
with {:ok, fields} <- Parser.parse(url),
|
||||
{:ok, card} <- create(url, fields) do
|
||||
card
|
||||
else
|
||||
_ -> nil
|
||||
end
|
||||
end
|
||||
else
|
||||
_ ->
|
||||
nil
|
||||
end
|
||||
end
|
||||
|
||||
def get_by_activity(activity) do
|
||||
with %Object{} = object <- Object.normalize(activity, fetch: false),
|
||||
{_, nil} <- {:cached, get_cached_url(object, activity.id)} do
|
||||
nil
|
||||
else
|
||||
{:cached, url} ->
|
||||
get_or_backfill_by_url(url, %{activity_id: activity.id})
|
||||
|
||||
_ ->
|
||||
:error
|
||||
end
|
||||
end
|
||||
|
||||
@spec url_to_hash(String.t()) :: String.t()
|
||||
def url_to_hash(url) do
|
||||
:crypto.hash(:sha256, url) |> Base.encode16(case: :lower)
|
||||
end
|
||||
|
||||
defp get_cached_url(object, activity_id) do
|
||||
key = "URL|#{activity_id}"
|
||||
|
||||
@cachex.fetch!(:scrubber_cache, key, fn _ ->
|
||||
url = HTML.extract_first_external_url_from_object(object)
|
||||
Activity.HTML.add_cache_key_for(activity_id, key)
|
||||
|
||||
{:commit, url}
|
||||
end)
|
||||
end
|
||||
end
|
|
@ -3,85 +3,13 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-only
|
||||
|
||||
defmodule Pleroma.Web.RichMedia.Helpers do
|
||||
alias Pleroma.Activity
|
||||
alias Pleroma.Config
|
||||
alias Pleroma.HTML
|
||||
alias Pleroma.Object
|
||||
alias Pleroma.Web.RichMedia.Parser
|
||||
|
||||
@options [
|
||||
max_body: 2_000_000,
|
||||
receive_timeout: 2_000
|
||||
]
|
||||
|
||||
@spec validate_page_url(URI.t() | binary()) :: :ok | :error
|
||||
defp validate_page_url(page_url) when is_binary(page_url) do
|
||||
validate_tld = Config.get([Pleroma.Formatter, :validate_tld])
|
||||
|
||||
page_url
|
||||
|> Linkify.Parser.url?(validate_tld: validate_tld)
|
||||
|> parse_uri(page_url)
|
||||
end
|
||||
|
||||
defp validate_page_url(%URI{host: host, scheme: "https", authority: authority})
|
||||
when is_binary(authority) do
|
||||
cond do
|
||||
host in Config.get([:rich_media, :ignore_hosts], []) ->
|
||||
:error
|
||||
|
||||
get_tld(host) in Config.get([:rich_media, :ignore_tld], []) ->
|
||||
:error
|
||||
|
||||
true ->
|
||||
:ok
|
||||
end
|
||||
end
|
||||
|
||||
defp validate_page_url(_), do: :error
|
||||
|
||||
defp parse_uri(true, url) do
|
||||
url
|
||||
|> URI.parse()
|
||||
|> validate_page_url
|
||||
end
|
||||
|
||||
defp parse_uri(_, _), do: :error
|
||||
|
||||
defp get_tld(host) do
|
||||
host
|
||||
|> String.split(".")
|
||||
|> Enum.reverse()
|
||||
|> hd
|
||||
end
|
||||
|
||||
def fetch_data_for_object(object) do
|
||||
with true <- Config.get([:rich_media, :enabled]),
|
||||
{:ok, page_url} <-
|
||||
HTML.extract_first_external_url_from_object(object),
|
||||
:ok <- validate_page_url(page_url),
|
||||
{:ok, rich_media} <- Parser.parse(page_url) do
|
||||
%{page_url: page_url, rich_media: rich_media}
|
||||
else
|
||||
_ -> %{}
|
||||
end
|
||||
end
|
||||
|
||||
def fetch_data_for_activity(%Activity{data: %{"type" => "Create"}} = activity) do
|
||||
with true <- Config.get([:rich_media, :enabled]),
|
||||
%Object{} = object <- Object.normalize(activity, fetch: false) do
|
||||
fetch_data_for_object(object)
|
||||
else
|
||||
_ -> %{}
|
||||
end
|
||||
end
|
||||
|
||||
def fetch_data_for_activity(_), do: %{}
|
||||
|
||||
def rich_media_get(url) do
|
||||
headers = [{"user-agent", Pleroma.Application.user_agent() <> "; Bot"}]
|
||||
|
||||
head_check =
|
||||
case Pleroma.HTTP.head(url, headers, @options) do
|
||||
case Pleroma.HTTP.head(url, headers, http_options()) do
|
||||
# If the HEAD request didn't reach the server for whatever reason,
|
||||
# we assume the GET that comes right after won't either
|
||||
{:error, _} = e ->
|
||||
|
@ -96,7 +24,7 @@ def rich_media_get(url) do
|
|||
:ok
|
||||
end
|
||||
|
||||
with :ok <- head_check, do: Pleroma.HTTP.get(url, headers, @options)
|
||||
with :ok <- head_check, do: Pleroma.HTTP.get(url, headers, http_options())
|
||||
end
|
||||
|
||||
defp check_content_type(headers) do
|
||||
|
@ -112,12 +40,13 @@ defp check_content_type(headers) do
|
|||
end
|
||||
end
|
||||
|
||||
@max_body @options[:max_body]
|
||||
defp check_content_length(headers) do
|
||||
max_body = Keyword.get(http_options(), :max_body)
|
||||
|
||||
case List.keyfind(headers, "content-length", 0) do
|
||||
{_, maybe_content_length} ->
|
||||
case Integer.parse(maybe_content_length) do
|
||||
{content_length, ""} when content_length <= @max_body -> :ok
|
||||
{content_length, ""} when content_length <= max_body -> :ok
|
||||
{_, ""} -> {:error, :body_too_large}
|
||||
_ -> :ok
|
||||
end
|
||||
|
@ -126,4 +55,11 @@ defp check_content_length(headers) do
|
|||
:ok
|
||||
end
|
||||
end
|
||||
|
||||
defp http_options() do
|
||||
[
|
||||
pool: :media,
|
||||
max_body: Config.get([:rich_media, :max_body], 2_000_000)
|
||||
]
|
||||
end
|
||||
end
|
||||
|
|
|
@ -5,137 +5,28 @@
|
|||
defmodule Pleroma.Web.RichMedia.Parser do
|
||||
require Logger
|
||||
|
||||
@cachex Pleroma.Config.get([:cachex, :provider], Cachex)
|
||||
@config_impl Application.compile_env(:pleroma, [__MODULE__, :config_impl], Pleroma.Config)
|
||||
|
||||
defp parsers do
|
||||
Pleroma.Config.get([:rich_media, :parsers])
|
||||
end
|
||||
|
||||
def parse(nil), do: {:error, "No URL provided"}
|
||||
def parse(nil), do: nil
|
||||
|
||||
if Pleroma.Config.get(:env) == :test do
|
||||
@spec parse(String.t()) :: {:ok, map()} | {:error, any()}
|
||||
def parse(url), do: parse_with_timeout(url)
|
||||
else
|
||||
@spec parse(String.t()) :: {:ok, map()} | {:error, any()}
|
||||
def parse(url) do
|
||||
with {:ok, data} <- get_cached_or_parse(url),
|
||||
{:ok, _} <- set_ttl_based_on_image(data, url) do
|
||||
{:ok, data}
|
||||
end
|
||||
end
|
||||
|
||||
defp get_cached_or_parse(url) do
|
||||
case @cachex.fetch(:rich_media_cache, url, fn ->
|
||||
case parse_with_timeout(url) do
|
||||
{:ok, _} = res ->
|
||||
{:commit, res}
|
||||
|
||||
{:error, reason} = e ->
|
||||
# Unfortunately we have to log errors here, instead of doing that
|
||||
# along with ttl setting at the bottom. Otherwise we can get log spam
|
||||
# if more than one process was waiting for the rich media card
|
||||
# while it was generated. Ideally we would set ttl here as well,
|
||||
# so we don't override it number_of_waiters_on_generation
|
||||
# times, but one, obviously, can't set ttl for not-yet-created entry
|
||||
# and Cachex doesn't support returning ttl from the fetch callback.
|
||||
log_error(url, reason)
|
||||
{:commit, e}
|
||||
end
|
||||
end) do
|
||||
{action, res} when action in [:commit, :ok] ->
|
||||
case res do
|
||||
{:ok, _data} = res ->
|
||||
res
|
||||
|
||||
{:error, reason} = e ->
|
||||
if action == :commit, do: set_error_ttl(url, reason)
|
||||
e
|
||||
end
|
||||
|
||||
{:error, e} ->
|
||||
{:error, {:cachex_error, e}}
|
||||
end
|
||||
end
|
||||
|
||||
defp set_error_ttl(_url, :body_too_large), do: :ok
|
||||
defp set_error_ttl(_url, {:content_type, _}), do: :ok
|
||||
|
||||
# The TTL is not set for the errors above, since they are unlikely to change
|
||||
# with time
|
||||
|
||||
defp set_error_ttl(url, _reason) do
|
||||
ttl = Pleroma.Config.get([:rich_media, :failure_backoff], 60_000)
|
||||
@cachex.expire(:rich_media_cache, url, ttl)
|
||||
:ok
|
||||
end
|
||||
|
||||
defp log_error(url, {:invalid_metadata, data}) do
|
||||
Logger.debug(fn -> "Incomplete or invalid metadata for #{url}: #{inspect(data)}" end)
|
||||
end
|
||||
|
||||
defp log_error(url, reason) do
|
||||
Logger.warning(fn -> "Rich media error for #{url}: #{inspect(reason)}" end)
|
||||
@spec parse(String.t()) :: {:ok, map()} | {:error, any()}
|
||||
def parse(url) do
|
||||
with :ok <- validate_page_url(url),
|
||||
{:ok, data} <- parse_url(url) do
|
||||
data = Map.put(data, "url", url)
|
||||
{:ok, data}
|
||||
end
|
||||
end
|
||||
|
||||
@doc """
|
||||
Set the rich media cache based on the expiration time of image.
|
||||
|
||||
Adopt behaviour `Pleroma.Web.RichMedia.Parser.TTL`
|
||||
|
||||
## Example
|
||||
|
||||
defmodule MyModule do
|
||||
@behaviour Pleroma.Web.RichMedia.Parser.TTL
|
||||
def ttl(data, url) do
|
||||
image_url = Map.get(data, :image)
|
||||
# do some parsing in the url and get the ttl of the image
|
||||
# and return ttl is unix time
|
||||
parse_ttl_from_url(image_url)
|
||||
end
|
||||
end
|
||||
|
||||
Define the module in the config
|
||||
|
||||
config :pleroma, :rich_media,
|
||||
ttl_setters: [MyModule]
|
||||
"""
|
||||
@spec set_ttl_based_on_image(map(), String.t()) ::
|
||||
{:ok, Integer.t() | :noop} | {:error, :no_key}
|
||||
def set_ttl_based_on_image(data, url) do
|
||||
case get_ttl_from_image(data, url) do
|
||||
{:ok, ttl} when is_number(ttl) ->
|
||||
ttl = ttl * 1000
|
||||
|
||||
case @cachex.expire_at(:rich_media_cache, url, ttl) do
|
||||
{:ok, true} -> {:ok, ttl}
|
||||
{:ok, false} -> {:error, :no_key}
|
||||
end
|
||||
|
||||
_ ->
|
||||
{:ok, :noop}
|
||||
end
|
||||
end
|
||||
|
||||
defp get_ttl_from_image(data, url) do
|
||||
[:rich_media, :ttl_setters]
|
||||
|> Pleroma.Config.get()
|
||||
|> Enum.reduce({:ok, nil}, fn
|
||||
module, {:ok, _ttl} ->
|
||||
module.ttl(data, url)
|
||||
|
||||
_, error ->
|
||||
error
|
||||
end)
|
||||
end
|
||||
|
||||
def parse_url(url) do
|
||||
defp parse_url(url) do
|
||||
with {:ok, %Tesla.Env{body: html}} <- Pleroma.Web.RichMedia.Helpers.rich_media_get(url),
|
||||
{:ok, html} <- Floki.parse_document(html) do
|
||||
html
|
||||
|> maybe_parse()
|
||||
|> Map.put("url", url)
|
||||
|> clean_parsed_data()
|
||||
|> check_parsed_data()
|
||||
end
|
||||
|
|
|
@ -3,5 +3,17 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-only
|
||||
|
||||
defmodule Pleroma.Web.RichMedia.Parser.TTL do
|
||||
@callback ttl(Map.t(), String.t()) :: Integer.t() | nil
|
||||
@callback ttl(map(), String.t()) :: integer() | nil
|
||||
|
||||
def get_from_image(data, url) do
|
||||
[:rich_media, :ttl_setters]
|
||||
|> Pleroma.Config.get()
|
||||
|> Enum.reduce({:ok, nil}, fn
|
||||
module, {:ok, _ttl} ->
|
||||
module.ttl(data, url)
|
||||
|
||||
_, error ->
|
||||
error
|
||||
end)
|
||||
end
|
||||
end
|
||||
|
|
|
@ -7,7 +7,7 @@ defmodule Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl do
|
|||
|
||||
@impl true
|
||||
def ttl(data, _url) do
|
||||
image = Map.get(data, :image)
|
||||
image = Map.get(data, "image")
|
||||
|
||||
if is_aws_signed_url(image) do
|
||||
image
|
||||
|
|
15
lib/pleroma/workers/rich_media_expiration_worker.ex
Normal file
15
lib/pleroma/workers/rich_media_expiration_worker.ex
Normal file
|
@ -0,0 +1,15 @@
|
|||
# Pleroma: A lightweight social networking server
|
||||
# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/>
|
||||
# SPDX-License-Identifier: AGPL-3.0-only
|
||||
|
||||
defmodule Pleroma.Workers.RichMediaExpirationWorker do
|
||||
alias Pleroma.Web.RichMedia.Card
|
||||
|
||||
use Oban.Worker,
|
||||
queue: :rich_media_expiration
|
||||
|
||||
@impl Oban.Worker
|
||||
def perform(%Job{args: %{"url" => url} = _args}) do
|
||||
Card.delete(url)
|
||||
end
|
||||
end
|
|
@ -0,0 +1,14 @@
|
|||
defmodule Pleroma.Repo.Migrations.CreateRichMediaCard do
|
||||
use Ecto.Migration
|
||||
|
||||
def change do
|
||||
create table(:rich_media_card) do
|
||||
add(:url_hash, :bytea)
|
||||
add(:fields, :map)
|
||||
|
||||
timestamps()
|
||||
end
|
||||
|
||||
create(unique_index(:rich_media_card, [:url_hash]))
|
||||
end
|
||||
end
|
Loading…
Reference in a new issue