From ad7dcf38a854ac762c812eae1ea5f8ba6b707cd6 Mon Sep 17 00:00:00 2001 From: FloatingGhost Date: Fri, 15 Dec 2023 17:12:45 +0000 Subject: [PATCH] Add HTTP backoff cache to respect 429s --- lib/pleroma/application.ex | 3 +- lib/pleroma/http/backoff.ex | 57 +++++++++++++++++++++++++++++++++++ lib/pleroma/object/fetcher.ex | 2 +- lib/pleroma/web/web_finger.ex | 5 +-- 4 files changed, 63 insertions(+), 4 deletions(-) create mode 100644 lib/pleroma/http/backoff.ex diff --git a/lib/pleroma/application.ex b/lib/pleroma/application.ex index 28a86d0aa..25fb11660 100644 --- a/lib/pleroma/application.ex +++ b/lib/pleroma/application.ex @@ -179,7 +179,8 @@ defp cachex_children do build_cachex("translations", default_ttl: :timer.hours(24 * 30), limit: 2500), build_cachex("instances", default_ttl: :timer.hours(24), ttl_interval: 1000, limit: 2500), build_cachex("request_signatures", default_ttl: :timer.hours(24 * 30), limit: 3000), - build_cachex("rel_me", default_ttl: :timer.hours(24 * 30), limit: 300) + build_cachex("rel_me", default_ttl: :timer.hours(24 * 30), limit: 300), + build_cachex("http_backoff", default_ttl: :timer.hours(24 * 30), limit: 10000) ] end diff --git a/lib/pleroma/http/backoff.ex b/lib/pleroma/http/backoff.ex new file mode 100644 index 000000000..d51c0547a --- /dev/null +++ b/lib/pleroma/http/backoff.ex @@ -0,0 +1,57 @@ +defmodule Pleroma.HTTP.Backoff do + alias Pleroma.HTTP + require Logger + + @cachex Pleroma.Config.get([:cachex, :provider], Cachex) + @backoff_cache :http_backoff_cache + + defp next_backoff_timestamp(%{headers: headers}) when is_list(headers) do + # figure out from the 429 response when we can make the next request + # mastodon uses the x-ratelimit-reset header, so we will use that! + # other servers may not, so we'll default to 5 minutes from now if we can't find it + case Enum.find_value(headers, fn {"x-ratelimit-reset", value} -> value end) do + nil -> + DateTime.utc_now() + |> Timex.shift(seconds: 5 * 60) + + value -> + {:ok, stamp} = DateTime.from_iso8601(value) + stamp + end + end + + defp next_backoff_timestamp(_), do: DateTime.utc_now() |> Timex.shift(seconds: 5 * 60) + + def get(url, headers \\ [], options \\ []) do + # this acts as a single throughput for all GET requests + # we will check if the host is in the cache, and if it is, we will automatically fail the request + # this ensures that we don't hammer the server with requests, and instead wait for the backoff to expire + # this is a very simple implementation, and can be improved upon! + %{host: host} = URI.parse(url) + + case @cachex.get(@backoff_cache, host) do + {:ok, nil} -> + case HTTP.get(url, headers, options) do + {:ok, env} -> + case env.status do + 429 -> + Logger.error("Rate limited on #{host}! Backing off...") + timestamp = next_backoff_timestamp(env) + ttl = Timex.diff(timestamp, DateTime.utc_now(), :seconds) + # we will cache the host for 5 minutes + @cachex.put(@backoff_cache, host, true, ttl) + {:error, :ratelimit} + + _ -> + {:ok, env} + end + + {:error, env} -> + {:error, env} + end + + _ -> + {:error, :ratelimit} + end + end +end diff --git a/lib/pleroma/object/fetcher.ex b/lib/pleroma/object/fetcher.ex index b9d8dbaaa..937026e04 100644 --- a/lib/pleroma/object/fetcher.ex +++ b/lib/pleroma/object/fetcher.ex @@ -354,7 +354,7 @@ def get_object(id) do with {:ok, %{body: body, status: code, headers: headers, url: final_url}} when code in 200..299 <- - HTTP.get(id, headers), + HTTP.Backoff.get(id, headers), remote_host <- URI.parse(final_url).host, {:cross_domain_redirect, false} <- diff --git a/lib/pleroma/web/web_finger.ex b/lib/pleroma/web/web_finger.ex index 9d5efbb3e..280ed236e 100644 --- a/lib/pleroma/web/web_finger.ex +++ b/lib/pleroma/web/web_finger.ex @@ -160,7 +160,8 @@ def find_lrdd_template(domain) do # WebFinger is restricted to HTTPS - https://tools.ietf.org/html/rfc7033#section-9.1 meta_url = "https://#{domain}/.well-known/host-meta" - with {:ok, %{status: status, body: body}} when status in 200..299 <- HTTP.get(meta_url) do + with {:ok, %{status: status, body: body}} when status in 200..299 <- + HTTP.Backoff.get(meta_url) do get_template_from_xml(body) else error -> @@ -197,7 +198,7 @@ def finger(account) do with address when is_binary(address) <- get_address_from_domain(domain, encoded_account), {:ok, %{status: status, body: body, headers: headers}} when status in 200..299 <- - HTTP.get( + HTTP.Backoff.get( address, [{"accept", "application/xrd+xml,application/jrd+json"}] ) do