Add HTTP backoff cache to respect 429s

This commit is contained in:
FloatingGhost 2023-12-15 17:12:45 +00:00
parent 6fb91d79f3
commit 666d6238b3
4 changed files with 63 additions and 4 deletions

View file

@ -179,7 +179,8 @@ defp cachex_children do
build_cachex("translations", default_ttl: :timer.hours(24 * 30), limit: 2500),
build_cachex("instances", default_ttl: :timer.hours(24), ttl_interval: 1000, limit: 2500),
build_cachex("request_signatures", default_ttl: :timer.hours(24 * 30), limit: 3000),
build_cachex("rel_me", default_ttl: :timer.hours(24 * 30), limit: 300)
build_cachex("rel_me", default_ttl: :timer.hours(24 * 30), limit: 300),
build_cachex("http_backoff", default_ttl: :timer.hours(24 * 30), limit: 10000)
]
end

View file

@ -0,0 +1,57 @@
defmodule Pleroma.HTTP.Backoff do
alias Pleroma.HTTP
require Logger
@cachex Pleroma.Config.get([:cachex, :provider], Cachex)
@backoff_cache :http_backoff_cache
defp next_backoff_timestamp(%{headers: headers}) when is_list(headers) do
# figure out from the 429 response when we can make the next request
# mastodon uses the x-ratelimit-reset header, so we will use that!
# other servers may not, so we'll default to 5 minutes from now if we can't find it
case Enum.find_value(headers, fn {"x-ratelimit-reset", value} -> value end) do
nil ->
DateTime.utc_now()
|> Timex.shift(seconds: 5 * 60)
value ->
{:ok, stamp} = DateTime.from_iso8601(value)
stamp
end
end
defp next_backoff_timestamp(_), do: DateTime.utc_now() |> Timex.shift(seconds: 5 * 60)
def get(url, headers \\ [], options \\ []) do
# this acts as a single throughput for all GET requests
# we will check if the host is in the cache, and if it is, we will automatically fail the request
# this ensures that we don't hammer the server with requests, and instead wait for the backoff to expire
# this is a very simple implementation, and can be improved upon!
%{host: host} = URI.parse(url)
case @cachex.get(@backoff_cache, host) do
{:ok, nil} ->
case HTTP.get(url, headers, options) do
{:ok, env} ->
case env.status do
429 ->
Logger.error("Rate limited on #{host}! Backing off...")
timestamp = next_backoff_timestamp(env)
ttl = Timex.diff(timestamp, DateTime.utc_now(), :seconds)
# we will cache the host for 5 minutes
@cachex.put(@backoff_cache, host, true, ttl)
{:error, env}
_ ->
{:ok, env}
end
{:error, env} ->
{:error, env}
end
_ ->
{:error, %Tesla.Env{status: 429, body: "Rate limited (internal backoff)"}}
end
end
end

View file

@ -275,7 +275,7 @@ def get_object(id) do
|> maybe_date_fetch(date)
|> sign_fetch(id, date)
case HTTP.get(id, headers) do
case HTTP.Backoff.get(id, headers) do
{:ok, %{body: body, status: code, headers: headers}} when code in 200..299 ->
case List.keyfind(headers, "content-type", 0) do
{_, content_type} ->

View file

@ -160,7 +160,8 @@ def find_lrdd_template(domain) do
# WebFinger is restricted to HTTPS - https://tools.ietf.org/html/rfc7033#section-9.1
meta_url = "https://#{domain}/.well-known/host-meta"
with {:ok, %{status: status, body: body}} when status in 200..299 <- HTTP.get(meta_url) do
with {:ok, %{status: status, body: body}} when status in 200..299 <-
HTTP.Backoff.get(meta_url) do
get_template_from_xml(body)
else
error ->
@ -197,7 +198,7 @@ def finger(account) do
with address when is_binary(address) <- get_address_from_domain(domain, encoded_account),
{:ok, %{status: status, body: body, headers: headers}} when status in 200..299 <-
HTTP.get(
HTTP.Backoff.get(
address,
[{"accept", "application/xrd+xml,application/jrd+json"}]
) do