From a6df71eebb6fcb29460b510315048cea4cb32f07 Mon Sep 17 00:00:00 2001
From: Oneric <oneric@oneric.stub>
Date: Sat, 3 Feb 2024 17:30:00 +0100
Subject: [PATCH 1/5] Don't add summary metrics to prometheus
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The exporter doesn’t support them thus we don't lose anything by this,
but it avoids a bunch of warnings each time the server starts up.
---
 lib/pleroma/web/telemetry.ex | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/lib/pleroma/web/telemetry.ex b/lib/pleroma/web/telemetry.ex
index b03850600..eecaffe88 100644
--- a/lib/pleroma/web/telemetry.ex
+++ b/lib/pleroma/web/telemetry.ex
@@ -101,6 +101,7 @@ defmodule Pleroma.Web.Telemetry do
     ]
   end
 
+  # Summary metrics are currently not (yet) supported by the prometheus exporter
   defp summary_metrics do
     [
       # Phoenix Metrics
@@ -121,7 +122,12 @@ defmodule Pleroma.Web.Telemetry do
       summary("vm.memory.total", unit: {:byte, :kilobyte}),
       summary("vm.total_run_queue_lengths.total"),
       summary("vm.total_run_queue_lengths.cpu"),
-      summary("vm.total_run_queue_lengths.io"),
+      summary("vm.total_run_queue_lengths.io")
+    ]
+  end
+
+  defp common_metrics do
+    [
       last_value("pleroma.local_users.total"),
       last_value("pleroma.domains.total"),
       last_value("pleroma.local_statuses.total"),
@@ -129,8 +135,8 @@ defmodule Pleroma.Web.Telemetry do
     ]
   end
 
-  def prometheus_metrics, do: summary_metrics() ++ distribution_metrics()
-  def live_dashboard_metrics, do: summary_metrics()
+  def prometheus_metrics, do: common_metrics() ++ distribution_metrics()
+  def live_dashboard_metrics, do: common_metrics() ++ summary_metrics()
 
   defp periodic_measurements do
     [
-- 
2.34.1


From 18ecae61839dd3d6e4d2aaaf2d0c01bc448fcfb5 Mon Sep 17 00:00:00 2001
From: Oneric <oneric@oneric.stub>
Date: Sat, 3 Feb 2024 17:51:40 +0100
Subject: [PATCH 2/5] Use fully qualified function capture for telementry event

Otherwise we get warnings on startup as local captures
and anonymous functions are supposedly less performant.
---
 lib/pleroma/job_queue_monitor.ex | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/lib/pleroma/job_queue_monitor.ex b/lib/pleroma/job_queue_monitor.ex
index b5f124923..8d81ffcac 100644
--- a/lib/pleroma/job_queue_monitor.ex
+++ b/lib/pleroma/job_queue_monitor.ex
@@ -15,8 +15,19 @@ defmodule Pleroma.JobQueueMonitor do
 
   @impl true
   def init(state) do
-    :telemetry.attach("oban-monitor-failure", [:oban, :job, :exception], &handle_event/4, nil)
-    :telemetry.attach("oban-monitor-success", [:oban, :job, :stop], &handle_event/4, nil)
+    :telemetry.attach(
+      "oban-monitor-failure",
+      [:oban, :job, :exception],
+      &Pleroma.JobQueueMonitor.handle_event/4,
+      nil
+    )
+
+    :telemetry.attach(
+      "oban-monitor-success",
+      [:oban, :job, :stop],
+      &Pleroma.JobQueueMonitor.handle_event/4,
+      nil
+    )
 
     {:ok, state}
   end
-- 
2.34.1


From 8f8e1ff2145d4b48b6c45cf46bc8f85e6222c7c0 Mon Sep 17 00:00:00 2001
From: Oneric <oneric@oneric.stub>
Date: Thu, 8 Feb 2024 00:10:46 +0100
Subject: [PATCH 3/5] Purge unused function scrub_css

Commit e9f1897cfdb32c890e9eaf2e894128be5c7e1123 added this private
function but it never had any users resulting in warnings each startup
---
 priv/scrubbers/default.ex | 2 --
 1 file changed, 2 deletions(-)

diff --git a/priv/scrubbers/default.ex b/priv/scrubbers/default.ex
index 6a97cbfd4..74de910fd 100644
--- a/priv/scrubbers/default.ex
+++ b/priv/scrubbers/default.ex
@@ -128,6 +128,4 @@ defmodule Pleroma.HTML.Scrubber.Default do
   Meta.allow_tag_with_these_attributes(:small, [])
 
   Meta.strip_everything_not_covered()
-
-  defp scrub_css(value), do: value
 end
-- 
2.34.1


From 16197ff57a181c4202317519b33d19826b53fbba Mon Sep 17 00:00:00 2001
From: Oneric <oneric@oneric.stub>
Date: Sat, 3 Feb 2024 18:21:09 +0100
Subject: [PATCH 4/5] Display memory as MB in live dashboard
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With kilobyte the resulting numbers got too large and were cut off
in the charts, making them useless. However, even an idle Akkoma
server’s memory usage is in the lower hundreths of megabytes, so
we don’t need this much precision to begin with for the dashboard.

Other metric users might prefer base units and can handle scaling in a
smarter way, so keep this configurable.
---
 lib/pleroma/web/telemetry.ex | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/pleroma/web/telemetry.ex b/lib/pleroma/web/telemetry.ex
index eecaffe88..3ea88b31d 100644
--- a/lib/pleroma/web/telemetry.ex
+++ b/lib/pleroma/web/telemetry.ex
@@ -102,7 +102,7 @@ defmodule Pleroma.Web.Telemetry do
   end
 
   # Summary metrics are currently not (yet) supported by the prometheus exporter
-  defp summary_metrics do
+  defp summary_metrics(byte_unit) do
     [
       # Phoenix Metrics
       summary("phoenix.endpoint.stop.duration",
@@ -119,7 +119,7 @@ defmodule Pleroma.Web.Telemetry do
       summary("pleroma.repo.query.idle_time", unit: {:native, :millisecond}),
 
       # VM Metrics
-      summary("vm.memory.total", unit: {:byte, :kilobyte}),
+      summary("vm.memory.total", unit: {:byte, byte_unit}),
       summary("vm.total_run_queue_lengths.total"),
       summary("vm.total_run_queue_lengths.cpu"),
       summary("vm.total_run_queue_lengths.io")
@@ -136,7 +136,7 @@ defmodule Pleroma.Web.Telemetry do
   end
 
   def prometheus_metrics, do: common_metrics() ++ distribution_metrics()
-  def live_dashboard_metrics, do: common_metrics() ++ summary_metrics()
+  def live_dashboard_metrics, do: common_metrics() ++ summary_metrics(:megabyte)
 
   defp periodic_measurements do
     [
-- 
2.34.1


From 29f564f700e4c6998f0bbb830128e9d520cd1905 Mon Sep 17 00:00:00 2001
From: Oneric <oneric@oneric.stub>
Date: Sat, 3 Feb 2024 18:28:55 +0100
Subject: [PATCH 5/5] Use fallbacks of summary metrics for prometheus

---
 lib/pleroma/web/telemetry.ex | 87 +++++++++++++++++++++++++++++++++++-
 1 file changed, 86 insertions(+), 1 deletion(-)

diff --git a/lib/pleroma/web/telemetry.ex b/lib/pleroma/web/telemetry.ex
index 3ea88b31d..269f9f238 100644
--- a/lib/pleroma/web/telemetry.ex
+++ b/lib/pleroma/web/telemetry.ex
@@ -126,6 +126,89 @@ defmodule Pleroma.Web.Telemetry do
     ]
   end
 
+  defp sum_counter_pair(basename, opts) do
+    [
+      sum(basename <> ".psum", opts),
+      counter(basename <> ".pcount", opts)
+    ]
+  end
+
+  # Prometheus exporter doesn't support summaries, so provide fallbacks
+  defp summary_fallback_metrics(byte_unit \\ :byte) do
+    # Summary metrics are not supported by the Prometheus exporter
+    #   https://github.com/beam-telemetry/telemetry_metrics_prometheus_core/issues/11
+    # and sum metrics currently only work with integers
+    #   https://github.com/beam-telemetry/telemetry_metrics_prometheus_core/issues/35
+    #
+    # For VM metrics this is kindof ok as they appear to always be integers
+    # and we can use sum + counter to get the average between polls from their change
+    # But for repo query times we need to use a full distribution
+
+    simple_buckets = [0, 1, 2, 4, 8, 16]
+    simple_buckets_quick = for t <- simple_buckets, do: t / 100.0
+
+    # Already included in distribution metrics anyway:
+    #   phoenix.router_dispatch.stop.duration
+    #   pleroma.repo.query.total_time
+    #   pleroma.repo.query.queue_time
+    dist_metrics =
+      [
+        distribution("phoenix.endpoint.stop.duration.fdist",
+          event_name: [:phoenix, :endpoint, :stop],
+          measurement: :duration,
+          unit: {:native, :millisecond},
+          reporter_options: [
+            buckets: simple_buckets
+          ]
+        ),
+        distribution("pleroma.repo.query.decode_time.fdist",
+          event_name: [:pleroma, :repo, :query],
+          measurement: :decode_time,
+          unit: {:native, :millisecond},
+          reporter_options: [
+            buckets: simple_buckets_quick
+          ]
+        ),
+        distribution("pleroma.repo.query.query_time.fdist",
+          event_name: [:pleroma, :repo, :query],
+          measurement: :query_time,
+          unit: {:native, :millisecond},
+          reporter_options: [
+            buckets: simple_buckets
+          ]
+        ),
+        distribution("pleroma.repo.query.idle_time.fdist",
+          event_name: [:pleroma, :repo, :query],
+          measurement: :idle_time,
+          unit: {:native, :millisecond},
+          reporter_options: [
+            buckets: simple_buckets
+          ]
+        )
+      ]
+
+    vm_metrics =
+      sum_counter_pair("vm.memory.total",
+        event_name: [:vm, :memory],
+        measurement: :total,
+        unit: {:byte, byte_unit}
+      ) ++
+        sum_counter_pair("vm.total_run_queue_lengths.total",
+          event_name: [:vm, :total_run_queue_lengths],
+          measurement: :total
+        ) ++
+        sum_counter_pair("vm.total_run_queue_lengths.cpu",
+          event_name: [:vm, :total_run_queue_lengths],
+          measurement: :cpu
+        ) ++
+        sum_counter_pair("vm.total_run_queue_lengths.io.fsum",
+          event_name: [:vm, :total_run_queue_lengths],
+          measurement: :io
+        )
+
+    dist_metrics ++ vm_metrics
+  end
+
   defp common_metrics do
     [
       last_value("pleroma.local_users.total"),
@@ -135,7 +218,9 @@ defmodule Pleroma.Web.Telemetry do
     ]
   end
 
-  def prometheus_metrics, do: common_metrics() ++ distribution_metrics()
+  def prometheus_metrics,
+    do: common_metrics() ++ distribution_metrics() ++ summary_fallback_metrics()
+
   def live_dashboard_metrics, do: common_metrics() ++ summary_metrics(:megabyte)
 
   defp periodic_measurements do
-- 
2.34.1