Connection pool: Add client death tracking

While running this in production I noticed a number of ghost
processes with all their clients dead before they released the connection,
so let's track them to log it and remove them from clients
This commit is contained in:
rinpatch 2020-05-08 18:18:59 +03:00
parent e94ba05e52
commit 1b15cb066c
2 changed files with 45 additions and 2 deletions

View file

@ -20,7 +20,10 @@ defmodule Pleroma.Gun.ConnectionPool.Worker do
end) end)
send(client_pid, {:conn_pid, conn_pid}) send(client_pid, {:conn_pid, conn_pid})
{:ok, %{key: key, timer: nil}, :hibernate}
{:ok,
%{key: key, timer: nil, client_monitors: %{client_pid => Process.monitor(client_pid)}},
:hibernate}
else else
err -> {:stop, err} err -> {:stop, err}
end end
@ -45,6 +48,9 @@ defmodule Pleroma.Gun.ConnectionPool.Worker do
state state
end end
ref = Process.monitor(client_pid)
state = put_in(state.client_monitors[client_pid], ref)
{:noreply, state, :hibernate} {:noreply, state, :hibernate}
end end
@ -55,6 +61,9 @@ defmodule Pleroma.Gun.ConnectionPool.Worker do
{conn_pid, List.delete(used_by, client_pid), crf, last_reference} {conn_pid, List.delete(used_by, client_pid), crf, last_reference}
end) end)
{ref, state} = pop_in(state.client_monitors[client_pid])
Process.demonitor(ref)
timer = timer =
if used_by == [] do if used_by == [] do
max_idle = Pleroma.Config.get([:connections_pool, :max_idle_time], 30_000) max_idle = Pleroma.Config.get([:connections_pool, :max_idle_time], 30_000)
@ -85,6 +94,26 @@ defmodule Pleroma.Gun.ConnectionPool.Worker do
{:stop, {:error, down_message}, state} {:stop, {:error, down_message}, state}
end end
@impl true
def handle_info({:DOWN, _ref, :process, pid, reason}, state) do
# Sometimes the client is dead before we demonitor it in :remove_client, so the message
# arrives anyway
case state.client_monitors[pid] do
nil ->
{:noreply, state, :hibernate}
_ref ->
:telemetry.execute(
[:pleroma, :connection_pool, :client_death],
%{client_pid: pid, reason: reason},
%{key: state.key}
)
handle_cast({:remove_client, pid}, state)
end
end
# LRFU policy: https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.55.1478 # LRFU policy: https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.55.1478
defp crf(time_delta, prev_crf) do defp crf(time_delta, prev_crf) do
1 + :math.pow(0.5, time_delta / 100) * prev_crf 1 + :math.pow(0.5, time_delta / 100) * prev_crf

View file

@ -6,7 +6,8 @@ defmodule Pleroma.Telemetry.Logger do
@events [ @events [
[:pleroma, :connection_pool, :reclaim, :start], [:pleroma, :connection_pool, :reclaim, :start],
[:pleroma, :connection_pool, :reclaim, :stop], [:pleroma, :connection_pool, :reclaim, :stop],
[:pleroma, :connection_pool, :provision_failure] [:pleroma, :connection_pool, :provision_failure],
[:pleroma, :connection_pool, :client_death]
] ]
def attach do def attach do
:telemetry.attach_many("pleroma-logger", @events, &handle_event/4, []) :telemetry.attach_many("pleroma-logger", @events, &handle_event/4, [])
@ -59,4 +60,17 @@ defmodule Pleroma.Telemetry.Logger do
"Connection pool had to refuse opening a connection to #{key} due to connection limit exhaustion" "Connection pool had to refuse opening a connection to #{key} due to connection limit exhaustion"
end) end)
end end
def handle_event(
[:pleroma, :connection_pool, :client_death],
%{client_pid: client_pid, reason: reason},
%{key: key},
_
) do
Logger.warn(fn ->
"Pool worker for #{key}: Client #{inspect(client_pid)} died before releasing the connection with #{
inspect(reason)
}"
end)
end
end end