From 92544e8f99658be53161caa8c4b5498ae2271d10 Mon Sep 17 00:00:00 2001 From: Oneric Date: Tue, 29 Oct 2024 01:41:54 +0100 Subject: [PATCH] Don't enqueue a plethora of unnecessary NodeInfoFetcher jobs There were two issues leading to needles effort: Most importnatly, the use of AP IDs as "source_url" meant multiple simultaneous jobs got scheduled for the same instance even with the default unique settings. Also jobs were scheduled uncontionally for each processed AP object meaning we incured oberhead from managing Oban jobs even if we knew it wasn't necessary. By comparison the single query to check if an update is needed should be cheaper overall. --- lib/pleroma/instances/instance.ex | 8 +++++++ .../workers/nodeinfo_fetcher_worker.ex | 23 ++++++++++++++++++- .../web/activity_pub/side_effects_test.exs | 2 +- 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/lib/pleroma/instances/instance.ex b/lib/pleroma/instances/instance.ex index 5c70748b6..63362eb28 100644 --- a/lib/pleroma/instances/instance.ex +++ b/lib/pleroma/instances/instance.ex @@ -158,6 +158,14 @@ def needs_update(%Instance{metadata_updated_at: metadata_updated_at}) do NaiveDateTime.diff(now, metadata_updated_at) > 86_400 end + def needs_update(%URI{host: host}) do + with %Instance{} = instance <- Repo.get_by(Instance, %{host: host}) do + needs_update(instance) + else + _ -> true + end + end + def local do %Instance{ host: Pleroma.Web.Endpoint.host(), diff --git a/lib/pleroma/workers/nodeinfo_fetcher_worker.ex b/lib/pleroma/workers/nodeinfo_fetcher_worker.ex index 27492e1e3..32907bac9 100644 --- a/lib/pleroma/workers/nodeinfo_fetcher_worker.ex +++ b/lib/pleroma/workers/nodeinfo_fetcher_worker.ex @@ -1,9 +1,30 @@ defmodule Pleroma.Workers.NodeInfoFetcherWorker do - use Pleroma.Workers.WorkerHelper, queue: "nodeinfo_fetcher" + use Pleroma.Workers.WorkerHelper, + queue: "nodeinfo_fetcher", + unique: [ + keys: [:op, :source_url], + # old jobs still get pruned after a short while + period: :infinity, + states: Oban.Job.states() + ] alias Oban.Job alias Pleroma.Instances.Instance + def enqueue(op, %{"source_url" => ap_id} = params, worker_args) do + # reduce to base url to avoid enqueueing unneccessary duplicates + domain = + ap_id + |> URI.parse() + |> URI.merge("/") + + if Instance.needs_update(domain) do + do_enqueue(op, %{params | "source_url" => URI.to_string(domain)}, worker_args) + else + :ok + end + end + @impl Oban.Worker def perform(%Job{ args: %{"op" => "process", "source_url" => domain} diff --git a/test/pleroma/web/activity_pub/side_effects_test.exs b/test/pleroma/web/activity_pub/side_effects_test.exs index 28a591d3c..64a1fe6e6 100644 --- a/test/pleroma/web/activity_pub/side_effects_test.exs +++ b/test/pleroma/web/activity_pub/side_effects_test.exs @@ -46,7 +46,7 @@ test "it queues a fetch of instance information" do assert_enqueued( worker: Pleroma.Workers.NodeInfoFetcherWorker, - args: %{"op" => "process", "source_url" => "https://wowee.example.com/users/1"} + args: %{"op" => "process", "source_url" => "https://wowee.example.com/"} ) end end