diff --git a/lib/pleroma/job_queue_monitor.ex b/lib/pleroma/job_queue_monitor.ex index b5f124923..8d81ffcac 100644 --- a/lib/pleroma/job_queue_monitor.ex +++ b/lib/pleroma/job_queue_monitor.ex @@ -15,8 +15,19 @@ defmodule Pleroma.JobQueueMonitor do @impl true def init(state) do - :telemetry.attach("oban-monitor-failure", [:oban, :job, :exception], &handle_event/4, nil) - :telemetry.attach("oban-monitor-success", [:oban, :job, :stop], &handle_event/4, nil) + :telemetry.attach( + "oban-monitor-failure", + [:oban, :job, :exception], + &Pleroma.JobQueueMonitor.handle_event/4, + nil + ) + + :telemetry.attach( + "oban-monitor-success", + [:oban, :job, :stop], + &Pleroma.JobQueueMonitor.handle_event/4, + nil + ) {:ok, state} end diff --git a/lib/pleroma/web/telemetry.ex b/lib/pleroma/web/telemetry.ex index b03850600..269f9f238 100644 --- a/lib/pleroma/web/telemetry.ex +++ b/lib/pleroma/web/telemetry.ex @@ -101,7 +101,8 @@ defmodule Pleroma.Web.Telemetry do ] end - defp summary_metrics do + # Summary metrics are currently not (yet) supported by the prometheus exporter + defp summary_metrics(byte_unit) do [ # Phoenix Metrics summary("phoenix.endpoint.stop.duration", @@ -118,10 +119,98 @@ defmodule Pleroma.Web.Telemetry do summary("pleroma.repo.query.idle_time", unit: {:native, :millisecond}), # VM Metrics - summary("vm.memory.total", unit: {:byte, :kilobyte}), + summary("vm.memory.total", unit: {:byte, byte_unit}), summary("vm.total_run_queue_lengths.total"), summary("vm.total_run_queue_lengths.cpu"), - summary("vm.total_run_queue_lengths.io"), + summary("vm.total_run_queue_lengths.io") + ] + end + + defp sum_counter_pair(basename, opts) do + [ + sum(basename <> ".psum", opts), + counter(basename <> ".pcount", opts) + ] + end + + # Prometheus exporter doesn't support summaries, so provide fallbacks + defp summary_fallback_metrics(byte_unit \\ :byte) do + # Summary metrics are not supported by the Prometheus exporter + # https://github.com/beam-telemetry/telemetry_metrics_prometheus_core/issues/11 + # and sum metrics currently only work with integers + # https://github.com/beam-telemetry/telemetry_metrics_prometheus_core/issues/35 + # + # For VM metrics this is kindof ok as they appear to always be integers + # and we can use sum + counter to get the average between polls from their change + # But for repo query times we need to use a full distribution + + simple_buckets = [0, 1, 2, 4, 8, 16] + simple_buckets_quick = for t <- simple_buckets, do: t / 100.0 + + # Already included in distribution metrics anyway: + # phoenix.router_dispatch.stop.duration + # pleroma.repo.query.total_time + # pleroma.repo.query.queue_time + dist_metrics = + [ + distribution("phoenix.endpoint.stop.duration.fdist", + event_name: [:phoenix, :endpoint, :stop], + measurement: :duration, + unit: {:native, :millisecond}, + reporter_options: [ + buckets: simple_buckets + ] + ), + distribution("pleroma.repo.query.decode_time.fdist", + event_name: [:pleroma, :repo, :query], + measurement: :decode_time, + unit: {:native, :millisecond}, + reporter_options: [ + buckets: simple_buckets_quick + ] + ), + distribution("pleroma.repo.query.query_time.fdist", + event_name: [:pleroma, :repo, :query], + measurement: :query_time, + unit: {:native, :millisecond}, + reporter_options: [ + buckets: simple_buckets + ] + ), + distribution("pleroma.repo.query.idle_time.fdist", + event_name: [:pleroma, :repo, :query], + measurement: :idle_time, + unit: {:native, :millisecond}, + reporter_options: [ + buckets: simple_buckets + ] + ) + ] + + vm_metrics = + sum_counter_pair("vm.memory.total", + event_name: [:vm, :memory], + measurement: :total, + unit: {:byte, byte_unit} + ) ++ + sum_counter_pair("vm.total_run_queue_lengths.total", + event_name: [:vm, :total_run_queue_lengths], + measurement: :total + ) ++ + sum_counter_pair("vm.total_run_queue_lengths.cpu", + event_name: [:vm, :total_run_queue_lengths], + measurement: :cpu + ) ++ + sum_counter_pair("vm.total_run_queue_lengths.io.fsum", + event_name: [:vm, :total_run_queue_lengths], + measurement: :io + ) + + dist_metrics ++ vm_metrics + end + + defp common_metrics do + [ last_value("pleroma.local_users.total"), last_value("pleroma.domains.total"), last_value("pleroma.local_statuses.total"), @@ -129,8 +218,10 @@ defmodule Pleroma.Web.Telemetry do ] end - def prometheus_metrics, do: summary_metrics() ++ distribution_metrics() - def live_dashboard_metrics, do: summary_metrics() + def prometheus_metrics, + do: common_metrics() ++ distribution_metrics() ++ summary_fallback_metrics() + + def live_dashboard_metrics, do: common_metrics() ++ summary_metrics(:megabyte) defp periodic_measurements do [ diff --git a/priv/scrubbers/default.ex b/priv/scrubbers/default.ex index 6a97cbfd4..74de910fd 100644 --- a/priv/scrubbers/default.ex +++ b/priv/scrubbers/default.ex @@ -128,6 +128,4 @@ defmodule Pleroma.HTML.Scrubber.Default do Meta.allow_tag_with_these_attributes(:small, []) Meta.strip_everything_not_covered() - - defp scrub_css(value), do: value end