Don't enqueue a plethora of unnecessary NodeInfoFetcher jobs

There were two issues leading to needles effort:
Most importnatly, the use of AP IDs as "source_url" meant multiple
simultaneous jobs got scheduled for the same instance even with the
default unique settings.
Also jobs were scheduled uncontionally for each processed AP object
meaning we incured oberhead from managing Oban jobs even if we knew it
wasn't necessary. By comparison the single query to check if an update
is needed should be cheaper overall.
This commit is contained in:
Oneric 2024-10-29 01:41:54 +01:00
parent f289d85e60
commit 28a6d2830b
3 changed files with 31 additions and 2 deletions

View file

@ -158,6 +158,14 @@ def needs_update(%Instance{metadata_updated_at: metadata_updated_at}) do
NaiveDateTime.diff(now, metadata_updated_at) > 86_400
end
def needs_update(%URI{host: host}) do
with %Instance{} = instance <- Repo.get_by(Instance, %{host: host}) do
needs_update(instance)
else
_ -> true
end
end
def local do
%Instance{
host: Pleroma.Web.Endpoint.host(),

View file

@ -1,9 +1,30 @@
defmodule Pleroma.Workers.NodeInfoFetcherWorker do
use Pleroma.Workers.WorkerHelper, queue: "nodeinfo_fetcher"
use Pleroma.Workers.WorkerHelper,
queue: "nodeinfo_fetcher",
unique: [
keys: [:op, :source_url],
# old jobs still get pruned after a short while
period: :infinity,
states: Oban.Job.states()
]
alias Oban.Job
alias Pleroma.Instances.Instance
def enqueue(op, %{"source_url" => ap_id} = params, worker_args) do
# reduce to base url to avoid enqueueing unneccessary duplicates
domain =
ap_id
|> URI.parse()
|> URI.merge("/")
if Instance.needs_update(domain) do
do_enqueue(op, %{params | "source_url" => URI.to_string(domain)}, worker_args)
else
:ok
end
end
@impl Oban.Worker
def perform(%Job{
args: %{"op" => "process", "source_url" => domain}

View file

@ -46,7 +46,7 @@ test "it queues a fetch of instance information" do
assert_enqueued(
worker: Pleroma.Workers.NodeInfoFetcherWorker,
args: %{"op" => "process", "source_url" => "https://wowee.example.com/users/1"}
args: %{"op" => "process", "source_url" => "https://wowee.example.com/"}
)
end
end