Add HTTP backoff cache to respect 429s

This commit is contained in:
FloatingGhost 2023-12-15 17:12:45 +00:00 committed by Floatingghost
parent 12db5c23f2
commit ad7dcf38a8
4 changed files with 63 additions and 4 deletions

View file

@ -179,7 +179,8 @@ defmodule Pleroma.Application do
build_cachex("translations", default_ttl: :timer.hours(24 * 30), limit: 2500),
build_cachex("instances", default_ttl: :timer.hours(24), ttl_interval: 1000, limit: 2500),
build_cachex("request_signatures", default_ttl: :timer.hours(24 * 30), limit: 3000),
build_cachex("rel_me", default_ttl: :timer.hours(24 * 30), limit: 300)
build_cachex("rel_me", default_ttl: :timer.hours(24 * 30), limit: 300),
build_cachex("http_backoff", default_ttl: :timer.hours(24 * 30), limit: 10000)
]
end

View file

@ -0,0 +1,57 @@
defmodule Pleroma.HTTP.Backoff do
alias Pleroma.HTTP
require Logger
@cachex Pleroma.Config.get([:cachex, :provider], Cachex)
@backoff_cache :http_backoff_cache
defp next_backoff_timestamp(%{headers: headers}) when is_list(headers) do
# figure out from the 429 response when we can make the next request
# mastodon uses the x-ratelimit-reset header, so we will use that!
# other servers may not, so we'll default to 5 minutes from now if we can't find it
case Enum.find_value(headers, fn {"x-ratelimit-reset", value} -> value end) do
nil ->
DateTime.utc_now()
|> Timex.shift(seconds: 5 * 60)
value ->
{:ok, stamp} = DateTime.from_iso8601(value)
stamp
end
end
defp next_backoff_timestamp(_), do: DateTime.utc_now() |> Timex.shift(seconds: 5 * 60)
def get(url, headers \\ [], options \\ []) do
# this acts as a single throughput for all GET requests
# we will check if the host is in the cache, and if it is, we will automatically fail the request
# this ensures that we don't hammer the server with requests, and instead wait for the backoff to expire
# this is a very simple implementation, and can be improved upon!
%{host: host} = URI.parse(url)
case @cachex.get(@backoff_cache, host) do
{:ok, nil} ->
case HTTP.get(url, headers, options) do
{:ok, env} ->
case env.status do
429 ->
Logger.error("Rate limited on #{host}! Backing off...")
timestamp = next_backoff_timestamp(env)
ttl = Timex.diff(timestamp, DateTime.utc_now(), :seconds)
# we will cache the host for 5 minutes
@cachex.put(@backoff_cache, host, true, ttl)
{:error, :ratelimit}
_ ->
{:ok, env}
end
{:error, env} ->
{:error, env}
end
_ ->
{:error, :ratelimit}
end
end
end

View file

@ -354,7 +354,7 @@ defmodule Pleroma.Object.Fetcher do
with {:ok, %{body: body, status: code, headers: headers, url: final_url}}
when code in 200..299 <-
HTTP.get(id, headers),
HTTP.Backoff.get(id, headers),
remote_host <-
URI.parse(final_url).host,
{:cross_domain_redirect, false} <-

View file

@ -160,7 +160,8 @@ defmodule Pleroma.Web.WebFinger do
# WebFinger is restricted to HTTPS - https://tools.ietf.org/html/rfc7033#section-9.1
meta_url = "https://#{domain}/.well-known/host-meta"
with {:ok, %{status: status, body: body}} when status in 200..299 <- HTTP.get(meta_url) do
with {:ok, %{status: status, body: body}} when status in 200..299 <-
HTTP.Backoff.get(meta_url) do
get_template_from_xml(body)
else
error ->
@ -197,7 +198,7 @@ defmodule Pleroma.Web.WebFinger do
with address when is_binary(address) <- get_address_from_domain(domain, encoded_account),
{:ok, %{status: status, body: body, headers: headers}} when status in 200..299 <-
HTTP.get(
HTTP.Backoff.get(
address,
[{"accept", "application/xrd+xml,application/jrd+json"}]
) do