Add HTTP backoff cache to respect 429s

2023-12-15 17:12:45 +00:00 · 2023-12-15 17:12:45 +00:00 · 666d6238b3
commit 666d6238b3
parent 6fb91d79f3
4 changed files with 63 additions and 4 deletions
--- a/lib/pleroma/application.ex
+++ b/lib/pleroma/application.ex
@ -179,7 +179,8 @@ defp cachex_children do
      build_cachex("translations", default_ttl: :timer.hours(24 * 30), limit: 2500),
      build_cachex("instances", default_ttl: :timer.hours(24), ttl_interval: 1000, limit: 2500),
      build_cachex("request_signatures", default_ttl: :timer.hours(24 * 30), limit: 3000),
-      build_cachex("rel_me", default_ttl: :timer.hours(24 * 30), limit: 300)
+      build_cachex("rel_me", default_ttl: :timer.hours(24 * 30), limit: 300),
+      build_cachex("http_backoff", default_ttl: :timer.hours(24 * 30), limit: 10000)
    ]
  end

--- a/lib/pleroma/http/backoff.ex
+++ b/lib/pleroma/http/backoff.ex
@ -0,0 +1,57 @@
+defmodule Pleroma.HTTP.Backoff do
+  alias Pleroma.HTTP
+  require Logger
+
+  @cachex Pleroma.Config.get([:cachex, :provider], Cachex)
+  @backoff_cache :http_backoff_cache
+
+  defp next_backoff_timestamp(%{headers: headers}) when is_list(headers) do
+    # figure out from the 429 response when we can make the next request
+    # mastodon uses the x-ratelimit-reset header, so we will use that!
+    # other servers may not, so we'll default to 5 minutes from now if we can't find it
+    case Enum.find_value(headers, fn {"x-ratelimit-reset", value} -> value end) do
+      nil ->
+        DateTime.utc_now()
+        |> Timex.shift(seconds: 5 * 60)
+
+      value ->
+        {:ok, stamp} = DateTime.from_iso8601(value)
+        stamp
+    end
+  end
+
+  defp next_backoff_timestamp(_), do: DateTime.utc_now() |> Timex.shift(seconds: 5 * 60)
+
+  def get(url, headers \\ [], options \\ []) do
+    # this acts as a single throughput for all GET requests
+    # we will check if the host is in the cache, and if it is, we will automatically fail the request
+    # this ensures that we don't hammer the server with requests, and instead wait for the backoff to expire
+    # this is a very simple implementation, and can be improved upon!
+    %{host: host} = URI.parse(url)
+
+    case @cachex.get(@backoff_cache, host) do
+      {:ok, nil} ->
+        case HTTP.get(url, headers, options) do
+          {:ok, env} ->
+            case env.status do
+              429 ->
+                Logger.error("Rate limited on #{host}! Backing off...")
+                timestamp = next_backoff_timestamp(env)
+                ttl = Timex.diff(timestamp, DateTime.utc_now(), :seconds)
+                # we will cache the host for 5 minutes
+                @cachex.put(@backoff_cache, host, true, ttl)
+                {:error, env}
+
+              _ ->
+                {:ok, env}
+            end
+
+          {:error, env} ->
+            {:error, env}
+        end
+
+      _ ->
+        {:error, %Tesla.Env{status: 429, body: "Rate limited (internal backoff)"}}
+    end
+  end
+end
--- a/lib/pleroma/object/fetcher.ex
+++ b/lib/pleroma/object/fetcher.ex
@ -275,7 +275,7 @@ def get_object(id) do
      |> maybe_date_fetch(date)
      |> sign_fetch(id, date)

-    case HTTP.get(id, headers) do
+    case HTTP.Backoff.get(id, headers) do
      {:ok, %{body: body, status: code, headers: headers}} when code in 200..299 ->
        case List.keyfind(headers, "content-type", 0) do
          {_, content_type} ->
--- a/lib/pleroma/web/web_finger.ex
+++ b/lib/pleroma/web/web_finger.ex
@ -160,7 +160,8 @@ def find_lrdd_template(domain) do
    # WebFinger is restricted to HTTPS - https://tools.ietf.org/html/rfc7033#section-9.1
    meta_url = "https://#{domain}/.well-known/host-meta"

-    with {:ok, %{status: status, body: body}} when status in 200..299 <- HTTP.get(meta_url) do
+    with {:ok, %{status: status, body: body}} when status in 200..299 <-
+           HTTP.Backoff.get(meta_url) do
      get_template_from_xml(body)
    else
      error ->
@ -197,7 +198,7 @@ def finger(account) do

    with address when is_binary(address) <- get_address_from_domain(domain, encoded_account),
         {:ok, %{status: status, body: body, headers: headers}} when status in 200..299 <-
-           HTTP.get(
+           HTTP.Backoff.get(
             address,
             [{"accept", "application/xrd+xml,application/jrd+json"}]
           ) do