From c17681ae1e4df46a6626d9d7be58bbfefeefe537 Mon Sep 17 00:00:00 2001 From: "Haelwenn (lanodan) Monnier" Date: Fri, 5 May 2023 10:53:09 +0200 Subject: [PATCH 01/37] Purge obsolete ap_enabled indicator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It was used to migrate OStatus connections to ActivityPub if possible, but support for OStatus was long since dropped, all new actors always AP and if anything wasn't migrated before, their instance is already marked as unreachable anyway. The associated logic was also buggy in several ways and deleted users got set to ap_enabled=false also causing some issues. This patch is a pretty direct port of the original Pleroma MR; follow-up commits will further fix and clean up remaining issues. Changes made (other than trivial merge conflict resolutions): - converted CHANGELOG format - adapted migration id for Akkoma’s timeline - removed ap_enabled from additional tests Ported-from: https://git.pleroma.social/pleroma/pleroma/-/merge_requests/3880 --- CHANGELOG.md | 7 ++ lib/pleroma/user.ex | 13 +--- lib/pleroma/web/activity_pub/activity_pub.ex | 45 ++++++------- .../object_validators/add_remove_validator.ex | 3 +- lib/pleroma/web/activity_pub/publisher.ex | 2 - .../web/activity_pub/transmogrifier.ex | 42 ------------ lib/pleroma/web/federator.ex | 13 +--- lib/pleroma/workers/transmogrifier_worker.ex | 15 ----- .../20241213000000_remove_user_ap_enabled.exs | 13 ++++ test/pleroma/user_test.exs | 11 +--- .../activity_pub_controller_test.exs | 1 - .../web/activity_pub/activity_pub_test.exs | 1 - .../web/activity_pub/publisher_test.exs | 18 ++---- .../web/activity_pub/transmogrifier_test.exs | 64 ------------------- test/pleroma/web/common_api_test.exs | 2 +- test/pleroma/web/federator_test.exs | 6 +- test/support/factory.ex | 3 +- 17 files changed, 57 insertions(+), 202 deletions(-) delete mode 100644 lib/pleroma/workers/transmogrifier_worker.ex create mode 100644 priv/repo/migrations/20241213000000_remove_user_ap_enabled.exs diff --git a/CHANGELOG.md b/CHANGELOG.md index 42ed630fb..b0448b0d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ## Unreleased +## Added + +## Fixed + +## Changed +- Dropped obsolete `ap_enabled` indicator from user table and associated buggy logic + ## 2025.01.01 Hotfix: Federation could break if a null value found its way into `should_federate?\1` diff --git a/lib/pleroma/user.ex b/lib/pleroma/user.ex index dfeab0410..3042d86f2 100644 --- a/lib/pleroma/user.ex +++ b/lib/pleroma/user.ex @@ -127,7 +127,6 @@ defmodule Pleroma.User do field(:domain_blocks, {:array, :string}, default: []) field(:is_active, :boolean, default: true) field(:no_rich_text, :boolean, default: false) - field(:ap_enabled, :boolean, default: false) field(:is_moderator, :boolean, default: false) field(:is_admin, :boolean, default: false) field(:show_role, :boolean, default: true) @@ -473,7 +472,6 @@ def remote_user_changeset(struct \\ %User{local: false}, params) do :shared_inbox, :nickname, :avatar, - :ap_enabled, :banner, :background, :is_locked, @@ -1006,11 +1004,7 @@ def maybe_direct_follow(%User{} = follower, %User{local: true} = followed) do end def maybe_direct_follow(%User{} = follower, %User{} = followed) do - if not ap_enabled?(followed) do - follow(follower, followed) - else - {:ok, follower, followed} - end + {:ok, follower, followed} end @doc "A mass follow for local users. Respects blocks in both directions but does not create activities." @@ -1826,7 +1820,6 @@ def purge_user_changeset(user) do confirmation_token: nil, domain_blocks: [], is_active: false, - ap_enabled: false, is_moderator: false, is_admin: false, mastofe_settings: nil, @@ -2073,10 +2066,6 @@ def get_public_key_for_ap_id(ap_id) do end end - def ap_enabled?(%User{local: true}), do: true - def ap_enabled?(%User{ap_enabled: ap_enabled}), do: ap_enabled - def ap_enabled?(_), do: false - @doc "Gets or fetch a user by uri or nickname." @spec get_or_fetch(String.t()) :: {:ok, User.t()} | {:error, String.t()} def get_or_fetch("http://" <> _host = uri), do: get_or_fetch_by_ap_id(uri) diff --git a/lib/pleroma/web/activity_pub/activity_pub.ex b/lib/pleroma/web/activity_pub/activity_pub.ex index 9b28e64d9..494a27421 100644 --- a/lib/pleroma/web/activity_pub/activity_pub.ex +++ b/lib/pleroma/web/activity_pub/activity_pub.ex @@ -1626,7 +1626,6 @@ defp object_to_user_data(data, additional) do %{ ap_id: data["id"], uri: get_actor_url(data["url"]), - ap_enabled: true, banner: normalize_image(data["image"]), background: normalize_image(data["backgroundUrl"]), fields: fields, @@ -1743,7 +1742,7 @@ def user_data_from_user_object(data, additional \\ []) do end end - def fetch_and_prepare_user_from_ap_id(ap_id, additional \\ []) do + defp fetch_and_prepare_user_from_ap_id(ap_id, additional) do with {:ok, data} <- Fetcher.fetch_and_contain_remote_object_from_id(ap_id), {:valid, {:ok, _, _}} <- {:valid, UserValidator.validate(data, [])}, {:ok, data} <- user_data_from_user_object(data, additional) do @@ -1857,31 +1856,27 @@ def enqueue_pin_fetches(_), do: nil def make_user_from_ap_id(ap_id, additional \\ []) do user = User.get_cached_by_ap_id(ap_id) - if user && !User.ap_enabled?(user) do - Transmogrifier.upgrade_user_from_ap_id(ap_id) - else - with {:ok, data} <- fetch_and_prepare_user_from_ap_id(ap_id, additional) do - user = - if data.ap_id != ap_id do - User.get_cached_by_ap_id(data.ap_id) - else - user - end - - if user do - user - |> User.remote_user_changeset(data) - |> User.update_and_set_cache() - |> tap(fn _ -> enqueue_pin_fetches(data) end) + with {:ok, data} <- fetch_and_prepare_user_from_ap_id(ap_id, additional) do + user = + if data.ap_id != ap_id do + User.get_cached_by_ap_id(data.ap_id) else - maybe_handle_clashing_nickname(data) - - data - |> User.remote_user_changeset() - |> Repo.insert() - |> User.set_cache() - |> tap(fn _ -> enqueue_pin_fetches(data) end) + user end + + if user do + user + |> User.remote_user_changeset(data) + |> User.update_and_set_cache() + |> tap(fn _ -> enqueue_pin_fetches(data) end) + else + maybe_handle_clashing_nickname(data) + + data + |> User.remote_user_changeset() + |> Repo.insert() + |> User.set_cache() + |> tap(fn _ -> enqueue_pin_fetches(data) end) end end end diff --git a/lib/pleroma/web/activity_pub/object_validators/add_remove_validator.ex b/lib/pleroma/web/activity_pub/object_validators/add_remove_validator.ex index fc482c9c0..b2fa35831 100644 --- a/lib/pleroma/web/activity_pub/object_validators/add_remove_validator.ex +++ b/lib/pleroma/web/activity_pub/object_validators/add_remove_validator.ex @@ -73,6 +73,7 @@ defp maybe_refetch_user(%User{featured_address: address} = user) when is_binary( end defp maybe_refetch_user(%User{ap_id: ap_id}) do - Pleroma.Web.ActivityPub.Transmogrifier.upgrade_user_from_ap_id(ap_id) + # Maybe it could use User.get_or_fetch_by_ap_id to avoid refreshing too often + User.fetch_by_ap_id(ap_id) end end diff --git a/lib/pleroma/web/activity_pub/publisher.ex b/lib/pleroma/web/activity_pub/publisher.ex index 07f430805..056de24bc 100644 --- a/lib/pleroma/web/activity_pub/publisher.ex +++ b/lib/pleroma/web/activity_pub/publisher.ex @@ -219,7 +219,6 @@ def publish(%User{} = actor, %{data: %{"bcc" => bcc}} = activity) inboxes = recipients - |> Enum.filter(&User.ap_enabled?/1) |> Enum.map(fn actor -> actor.inbox end) |> Enum.filter(fn inbox -> should_federate?(inbox) end) |> Instances.filter_reachable() @@ -261,7 +260,6 @@ def publish(%User{} = actor, %Activity{} = activity) do json = Jason.encode!(data) recipients(actor, activity) - |> Enum.filter(fn user -> User.ap_enabled?(user) end) |> Enum.map(fn %User{} = user -> determine_inbox(activity, user) end) diff --git a/lib/pleroma/web/activity_pub/transmogrifier.ex b/lib/pleroma/web/activity_pub/transmogrifier.ex index 5c4db39b9..251ba6540 100644 --- a/lib/pleroma/web/activity_pub/transmogrifier.ex +++ b/lib/pleroma/web/activity_pub/transmogrifier.ex @@ -21,7 +21,6 @@ defmodule Pleroma.Web.ActivityPub.Transmogrifier do alias Pleroma.Web.ActivityPub.Visibility alias Pleroma.Web.ActivityPub.ObjectValidators.CommonFixes alias Pleroma.Web.Federator - alias Pleroma.Workers.TransmogrifierWorker import Ecto.Query @@ -1007,47 +1006,6 @@ defp strip_internal_tags(%{"tag" => tags} = object) do defp strip_internal_tags(object), do: object - def perform(:user_upgrade, user) do - # we pass a fake user so that the followers collection is stripped away - old_follower_address = User.ap_followers(%User{nickname: user.nickname}) - - from( - a in Activity, - where: ^old_follower_address in a.recipients, - update: [ - set: [ - recipients: - fragment( - "array_replace(?,?,?)", - a.recipients, - ^old_follower_address, - ^user.follower_address - ) - ] - ] - ) - |> Repo.update_all([]) - end - - def upgrade_user_from_ap_id(ap_id) do - with %User{local: false} = user <- User.get_cached_by_ap_id(ap_id), - {:ok, data} <- ActivityPub.fetch_and_prepare_user_from_ap_id(ap_id), - {:ok, user} <- update_user(user, data) do - ActivityPub.enqueue_pin_fetches(user) - TransmogrifierWorker.enqueue("user_upgrade", %{"user_id" => user.id}) - {:ok, user} - else - %User{} = user -> {:ok, user} - e -> e - end - end - - defp update_user(user, data) do - user - |> User.remote_user_changeset(data) - |> User.update_and_set_cache() - end - def maybe_fix_user_url(%{"url" => url} = data) when is_map(url) do Map.put(data, "url", url["href"]) end diff --git a/lib/pleroma/web/federator.ex b/lib/pleroma/web/federator.ex index 3a00424c6..aaf89412f 100644 --- a/lib/pleroma/web/federator.ex +++ b/lib/pleroma/web/federator.ex @@ -6,7 +6,6 @@ defmodule Pleroma.Web.Federator do alias Pleroma.Activity alias Pleroma.Object.Containment alias Pleroma.User - alias Pleroma.Web.ActivityPub.ActivityPub alias Pleroma.Web.ActivityPub.Transmogrifier alias Pleroma.Web.ActivityPub.Utils alias Pleroma.Web.Federator.Publisher @@ -92,7 +91,7 @@ def perform(:incoming_ap_doc, params) do # NOTE: we use the actor ID to do the containment, this is fine because an # actor shouldn't be acting on objects outside their own AP server. - with {_, {:ok, _user}} <- {:actor, ap_enabled_actor(actor)}, + with {_, {:ok, _user}} <- {:actor, User.get_or_fetch_by_ap_id(actor)}, nil <- Activity.normalize(params["id"]), {_, :ok} <- {:correct_origin?, Containment.contain_origin_from_id(actor, params)}, @@ -122,14 +121,4 @@ def perform(:incoming_ap_doc, params) do {:error, e} end end - - def ap_enabled_actor(id) do - user = User.get_cached_by_ap_id(id) - - if User.ap_enabled?(user) do - {:ok, user} - else - ActivityPub.make_user_from_ap_id(id) - end - end end diff --git a/lib/pleroma/workers/transmogrifier_worker.ex b/lib/pleroma/workers/transmogrifier_worker.ex deleted file mode 100644 index b39c1ea62..000000000 --- a/lib/pleroma/workers/transmogrifier_worker.ex +++ /dev/null @@ -1,15 +0,0 @@ -# Pleroma: A lightweight social networking server -# Copyright © 2017-2021 Pleroma Authors -# SPDX-License-Identifier: AGPL-3.0-only - -defmodule Pleroma.Workers.TransmogrifierWorker do - alias Pleroma.User - - use Pleroma.Workers.WorkerHelper, queue: "transmogrifier" - - @impl Oban.Worker - def perform(%Job{args: %{"op" => "user_upgrade", "user_id" => user_id}}) do - user = User.get_cached_by_id(user_id) - Pleroma.Web.ActivityPub.Transmogrifier.perform(:user_upgrade, user) - end -end diff --git a/priv/repo/migrations/20241213000000_remove_user_ap_enabled.exs b/priv/repo/migrations/20241213000000_remove_user_ap_enabled.exs new file mode 100644 index 000000000..0aea41324 --- /dev/null +++ b/priv/repo/migrations/20241213000000_remove_user_ap_enabled.exs @@ -0,0 +1,13 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2023 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.Repo.Migrations.RemoveUserApEnabled do + use Ecto.Migration + + def change do + alter table(:users) do + remove(:ap_enabled, :boolean, default: false, null: false) + end + end +end diff --git a/test/pleroma/user_test.exs b/test/pleroma/user_test.exs index ac886aaf9..0d5c9faec 100644 --- a/test/pleroma/user_test.exs +++ b/test/pleroma/user_test.exs @@ -1694,7 +1694,6 @@ test "delete/1 purges a user when they wouldn't be fully deleted" do confirmation_token: "qqqq", domain_blocks: ["lain.com"], is_active: false, - ap_enabled: true, is_moderator: true, is_admin: true, mastofe_settings: %{"a" => "b"}, @@ -1734,7 +1733,6 @@ test "delete/1 purges a user when they wouldn't be fully deleted" do confirmation_token: nil, domain_blocks: [], is_active: false, - ap_enabled: false, is_moderator: false, is_admin: false, mastofe_settings: nil, @@ -2217,8 +2215,7 @@ test "updates the counters normally on following/getting a follow when disabled" insert(:user, local: false, follower_address: "http://remote.org/users/masto_closed/followers", - following_address: "http://remote.org/users/masto_closed/following", - ap_enabled: true + following_address: "http://remote.org/users/masto_closed/following" ) assert other_user.following_count == 0 @@ -2239,8 +2236,7 @@ test "synchronizes the counters with the remote instance for the followed when e insert(:user, local: false, follower_address: "http://remote.org/users/masto_closed/followers", - following_address: "http://remote.org/users/masto_closed/following", - ap_enabled: true + following_address: "http://remote.org/users/masto_closed/following" ) assert other_user.following_count == 0 @@ -2261,8 +2257,7 @@ test "synchronizes the counters with the remote instance for the follower when e insert(:user, local: false, follower_address: "http://remote.org/users/masto_closed/followers", - following_address: "http://remote.org/users/masto_closed/following", - ap_enabled: true + following_address: "http://remote.org/users/masto_closed/following" ) assert other_user.following_count == 0 diff --git a/test/pleroma/web/activity_pub/activity_pub_controller_test.exs b/test/pleroma/web/activity_pub/activity_pub_controller_test.exs index 5b3697244..4cc7f93f5 100644 --- a/test/pleroma/web/activity_pub/activity_pub_controller_test.exs +++ b/test/pleroma/web/activity_pub/activity_pub_controller_test.exs @@ -579,7 +579,6 @@ test "it inserts an incoming activity into the database" <> user = insert(:user, ap_id: "https://mastodon.example.org/users/raymoo", - ap_enabled: true, local: false, last_refreshed_at: nil ) diff --git a/test/pleroma/web/activity_pub/activity_pub_test.exs b/test/pleroma/web/activity_pub/activity_pub_test.exs index c8f93f84d..7990b7ef5 100644 --- a/test/pleroma/web/activity_pub/activity_pub_test.exs +++ b/test/pleroma/web/activity_pub/activity_pub_test.exs @@ -178,7 +178,6 @@ test "it returns a user" do {:ok, user} = ActivityPub.make_user_from_ap_id(user_id) assert user.ap_id == user_id assert user.nickname == "admin@mastodon.example.org" - assert user.ap_enabled assert user.follower_address == "http://mastodon.example.org/users/admin/followers" end diff --git a/test/pleroma/web/activity_pub/publisher_test.exs b/test/pleroma/web/activity_pub/publisher_test.exs index 5896568b8..b90422d61 100644 --- a/test/pleroma/web/activity_pub/publisher_test.exs +++ b/test/pleroma/web/activity_pub/publisher_test.exs @@ -306,15 +306,13 @@ test "publish to url with with different ports" do follower = insert(:user, %{ local: false, - inbox: "https://domain.com/users/nick1/inbox", - ap_enabled: true + inbox: "https://domain.com/users/nick1/inbox" }) another_follower = insert(:user, %{ local: false, - inbox: "https://rejected.com/users/nick2/inbox", - ap_enabled: true + inbox: "https://rejected.com/users/nick2/inbox" }) actor = @@ -386,8 +384,7 @@ test "publish to url with with different ports" do follower = insert(:user, %{ local: false, - inbox: "https://domain.com/users/nick1/inbox", - ap_enabled: true + inbox: "https://domain.com/users/nick1/inbox" }) actor = @@ -425,8 +422,7 @@ test "publish to url with with different ports" do follower = insert(:user, %{ local: false, - inbox: "https://domain.com/users/nick1/inbox", - ap_enabled: true + inbox: "https://domain.com/users/nick1/inbox" }) actor = insert(:user, follower_address: follower.ap_id) @@ -461,15 +457,13 @@ test "publish to url with with different ports" do fetcher = insert(:user, local: false, - inbox: "https://domain.com/users/nick1/inbox", - ap_enabled: true + inbox: "https://domain.com/users/nick1/inbox" ) another_fetcher = insert(:user, local: false, - inbox: "https://domain2.com/users/nick1/inbox", - ap_enabled: true + inbox: "https://domain2.com/users/nick1/inbox" ) actor = insert(:user) diff --git a/test/pleroma/web/activity_pub/transmogrifier_test.exs b/test/pleroma/web/activity_pub/transmogrifier_test.exs index 1be69317c..94df25100 100644 --- a/test/pleroma/web/activity_pub/transmogrifier_test.exs +++ b/test/pleroma/web/activity_pub/transmogrifier_test.exs @@ -8,7 +8,6 @@ defmodule Pleroma.Web.ActivityPub.TransmogrifierTest do @moduletag :mocked alias Pleroma.Activity alias Pleroma.Object - alias Pleroma.Tests.ObanHelpers alias Pleroma.User alias Pleroma.Web.ActivityPub.Transmogrifier alias Pleroma.Web.ActivityPub.Utils @@ -348,69 +347,6 @@ test "Updates of Notes are handled" do end end - describe "user upgrade" do - test "it upgrades a user to activitypub" do - user = - insert(:user, %{ - nickname: "rye@niu.moe", - local: false, - ap_id: "https://niu.moe/users/rye", - follower_address: User.ap_followers(%User{nickname: "rye@niu.moe"}) - }) - - user_two = insert(:user) - Pleroma.FollowingRelationship.follow(user_two, user, :follow_accept) - - {:ok, activity} = CommonAPI.post(user, %{status: "test"}) - {:ok, unrelated_activity} = CommonAPI.post(user_two, %{status: "test"}) - assert "http://localhost:4001/users/rye@niu.moe/followers" in activity.recipients - - user = User.get_cached_by_id(user.id) - assert user.note_count == 1 - - {:ok, user} = Transmogrifier.upgrade_user_from_ap_id("https://niu.moe/users/rye") - ObanHelpers.perform_all() - - assert user.ap_enabled - assert user.note_count == 1 - assert user.follower_address == "https://niu.moe/users/rye/followers" - assert user.following_address == "https://niu.moe/users/rye/following" - - user = User.get_cached_by_id(user.id) - assert user.note_count == 1 - - activity = Activity.get_by_id(activity.id) - assert user.follower_address in activity.recipients - - assert %{ - "url" => [ - %{ - "href" => - "https://cdn.niu.moe/accounts/avatars/000/033/323/original/fd7f8ae0b3ffedc9.jpeg" - } - ] - } = user.avatar - - assert %{ - "url" => [ - %{ - "href" => - "https://cdn.niu.moe/accounts/headers/000/033/323/original/850b3448fa5fd477.png" - } - ] - } = user.banner - - refute "..." in activity.recipients - - unrelated_activity = Activity.get_by_id(unrelated_activity.id) - refute user.follower_address in unrelated_activity.recipients - - user_two = User.get_cached_by_id(user_two.id) - assert User.following?(user_two, user) - refute "..." in User.following(user_two) - end - end - describe "actor rewriting" do test "it fixes the actor URL property to be a proper URI" do data = %{ diff --git a/test/pleroma/web/common_api_test.exs b/test/pleroma/web/common_api_test.exs index 379cf85b8..b32334389 100644 --- a/test/pleroma/web/common_api_test.exs +++ b/test/pleroma/web/common_api_test.exs @@ -1129,7 +1129,7 @@ test "removes a pending follow for a local user" do test "cancels a pending follow for a remote user" do follower = insert(:user) - followed = insert(:user, is_locked: true, local: false, ap_enabled: true) + followed = insert(:user, is_locked: true, local: false) assert {:ok, follower, followed, %{id: _activity_id, data: %{"state" => "pending"}}} = CommonAPI.follow(follower, followed) diff --git a/test/pleroma/web/federator_test.exs b/test/pleroma/web/federator_test.exs index d3cc239cf..c9a13632a 100644 --- a/test/pleroma/web/federator_test.exs +++ b/test/pleroma/web/federator_test.exs @@ -79,16 +79,14 @@ test "it federates only to reachable instances via AP" do local: false, nickname: "nick1@domain.com", ap_id: "https://domain.com/users/nick1", - inbox: inbox1, - ap_enabled: true + inbox: inbox1 }) insert(:user, %{ local: false, nickname: "nick2@domain2.com", ap_id: "https://domain2.com/users/nick2", - inbox: inbox2, - ap_enabled: true + inbox: inbox2 }) dt = NaiveDateTime.utc_now() diff --git a/test/support/factory.ex b/test/support/factory.ex index 54e5f91b7..7797fc9b8 100644 --- a/test/support/factory.ex +++ b/test/support/factory.ex @@ -62,8 +62,7 @@ def user_factory(attrs \\ %{}) do last_digest_emailed_at: NaiveDateTime.utc_now(), last_refreshed_at: NaiveDateTime.utc_now(), notification_settings: %Pleroma.User.NotificationSetting{}, - multi_factor_authentication_settings: %Pleroma.MFA.Settings{}, - ap_enabled: true + multi_factor_authentication_settings: %Pleroma.MFA.Settings{} } urls = From 0f4a7a185f85e3a2a2965f41949a7f1348b53e22 Mon Sep 17 00:00:00 2001 From: Oneric Date: Fri, 13 Dec 2024 01:06:18 +0100 Subject: [PATCH 02/37] Drop ap_enabled indicator from atom feeds --- lib/pleroma/web/templates/feed/feed/_author.atom.eex | 3 --- lib/pleroma/web/templates/feed/feed/_author.rss.eex | 3 --- lib/pleroma/web/templates/feed/feed/_tag_author.atom.eex | 3 --- 3 files changed, 9 deletions(-) diff --git a/lib/pleroma/web/templates/feed/feed/_author.atom.eex b/lib/pleroma/web/templates/feed/feed/_author.atom.eex index 25cbffada..309d37dc7 100644 --- a/lib/pleroma/web/templates/feed/feed/_author.atom.eex +++ b/lib/pleroma/web/templates/feed/feed/_author.atom.eex @@ -11,7 +11,4 @@ <%= if User.banner_url(@user) do %> <% end %> - <%= if @user.local do %> - true - <% end %> diff --git a/lib/pleroma/web/templates/feed/feed/_author.rss.eex b/lib/pleroma/web/templates/feed/feed/_author.rss.eex index 526aeddcf..6eb82af67 100644 --- a/lib/pleroma/web/templates/feed/feed/_author.rss.eex +++ b/lib/pleroma/web/templates/feed/feed/_author.rss.eex @@ -11,7 +11,4 @@ <%= if User.banner_url(@user) do %> <%= User.banner_url(@user) %> <% end %> - <%= if @user.local do %> - true - <% end %> diff --git a/lib/pleroma/web/templates/feed/feed/_tag_author.atom.eex b/lib/pleroma/web/templates/feed/feed/_tag_author.atom.eex index 997c4936e..35c686089 100644 --- a/lib/pleroma/web/templates/feed/feed/_tag_author.atom.eex +++ b/lib/pleroma/web/templates/feed/feed/_tag_author.atom.eex @@ -8,9 +8,6 @@ <%= if User.banner_url(@actor) do %> <% end %> - <%= if @actor.local do %> - true - <% end %> <%= @actor.nickname %> <%= @actor.name %> From 4859f386246f0206d60f1279f94c5d4b69ddd09f Mon Sep 17 00:00:00 2001 From: Oneric Date: Fri, 13 Dec 2024 01:09:35 +0100 Subject: [PATCH 03/37] add_remove_validator: limit refetch rate to 1 per 5s This matches the maximum_age used when processing Move activities --- .../activity_pub/object_validators/add_remove_validator.ex | 6 ++++-- .../transmogrifier/add_remove_handling_test.exs | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/pleroma/web/activity_pub/object_validators/add_remove_validator.ex b/lib/pleroma/web/activity_pub/object_validators/add_remove_validator.ex index b2fa35831..c13f7d442 100644 --- a/lib/pleroma/web/activity_pub/object_validators/add_remove_validator.ex +++ b/lib/pleroma/web/activity_pub/object_validators/add_remove_validator.ex @@ -73,7 +73,9 @@ defp maybe_refetch_user(%User{featured_address: address} = user) when is_binary( end defp maybe_refetch_user(%User{ap_id: ap_id}) do - # Maybe it could use User.get_or_fetch_by_ap_id to avoid refreshing too often - User.fetch_by_ap_id(ap_id) + # If the user didn't expose a featured collection before, + # recheck now so we can verify perms for add/remove. + # But wait at least 5s to avoid rapid refetches in edge cases + User.get_or_fetch_by_ap_id(ap_id, maximum_age: 5) end end diff --git a/test/pleroma/web/activity_pub/transmogrifier/add_remove_handling_test.exs b/test/pleroma/web/activity_pub/transmogrifier/add_remove_handling_test.exs index c2b5f2cc8..f95d298e0 100644 --- a/test/pleroma/web/activity_pub/transmogrifier/add_remove_handling_test.exs +++ b/test/pleroma/web/activity_pub/transmogrifier/add_remove_handling_test.exs @@ -102,6 +102,7 @@ test "Add/Remove activities for remote users without featured address" do user = user |> Ecto.Changeset.change(featured_address: nil) + |> Ecto.Changeset.change(last_refreshed_at: ~N[2013-03-14 11:50:00.000000]) |> Repo.update!() %{host: host} = URI.parse(user.ap_id) From ead44c6671c55f55d565decee68434b699f4a99b Mon Sep 17 00:00:00 2001 From: Oneric Date: Fri, 13 Dec 2024 01:10:26 +0100 Subject: [PATCH 04/37] federator: don't fetch the user for no reason The return value is never used here; later stages which actually need it fetch the user themselves and it doesn't matter wheter we wait for the fech here or later (if needed at all). Even more, this early fetch always fails if the user was already deleted or never known to begin with, but we get something referencing it; e.g. the very Delete action carrying out the user deletion. This prevents processing of the Delete, but before that it will be reattempted several times, each time attempring to fetch the non-existing profile, wasting resources. --- lib/pleroma/web/federator.ex | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/pleroma/web/federator.ex b/lib/pleroma/web/federator.ex index aaf89412f..adf8298da 100644 --- a/lib/pleroma/web/federator.ex +++ b/lib/pleroma/web/federator.ex @@ -91,8 +91,7 @@ def perform(:incoming_ap_doc, params) do # NOTE: we use the actor ID to do the containment, this is fine because an # actor shouldn't be acting on objects outside their own AP server. - with {_, {:ok, _user}} <- {:actor, User.get_or_fetch_by_ap_id(actor)}, - nil <- Activity.normalize(params["id"]), + with nil <- Activity.normalize(params["id"]), {_, :ok} <- {:correct_origin?, Containment.contain_origin_from_id(actor, params)}, {:ok, activity} <- Transmogrifier.handle_incoming(params) do From 25d24cc5f65aa13b22ad15dcd486083e4c355cfe Mon Sep 17 00:00:00 2001 From: Oneric Date: Sat, 14 Dec 2024 02:02:04 +0000 Subject: [PATCH 05/37] validators/add_remove: don't crash on failure to resolve reference It allows for informed error handling and retry/discard job decisions lateron which a future commit will add. --- .../object_validators/add_remove_validator.ex | 22 +++++++++++++------ 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/lib/pleroma/web/activity_pub/object_validators/add_remove_validator.ex b/lib/pleroma/web/activity_pub/object_validators/add_remove_validator.ex index c13f7d442..47f1c8618 100644 --- a/lib/pleroma/web/activity_pub/object_validators/add_remove_validator.ex +++ b/lib/pleroma/web/activity_pub/object_validators/add_remove_validator.ex @@ -9,6 +9,7 @@ defmodule Pleroma.Web.ActivityPub.ObjectValidators.AddRemoveValidator do import Pleroma.Web.ActivityPub.ObjectValidators.CommonValidations require Pleroma.Constants + require Logger alias Pleroma.User @@ -27,14 +28,21 @@ defmodule Pleroma.Web.ActivityPub.ObjectValidators.AddRemoveValidator do end def cast_and_validate(data) do - {:ok, actor} = User.get_or_fetch_by_ap_id(data["actor"]) + with {_, {:ok, actor}} <- {:user, User.get_or_fetch_by_ap_id(data["actor"])}, + {_, {:ok, actor}} <- {:feataddr, maybe_refetch_user(actor)} do + data + |> maybe_fix_data_for_mastodon(actor) + |> cast_data() + |> validate_data(actor) + else + {:feataddr, _} -> + {:error, + {:validate, + "Actor doesn't provide featured collection address to verify against: #{data["id"]}"}} - {:ok, actor} = maybe_refetch_user(actor) - - data - |> maybe_fix_data_for_mastodon(actor) - |> cast_data() - |> validate_data(actor) + {:user, _} -> + {:error, :link_resolve_failed} + end end defp maybe_fix_data_for_mastodon(data, actor) do From ed4019e7a36b9049fadf96ca3ca9157ca29ba1a0 Mon Sep 17 00:00:00 2001 From: Oneric Date: Tue, 29 Oct 2024 00:18:15 +0100 Subject: [PATCH 06/37] workers: make custom filtering ahead of enqueue possible --- lib/pleroma/workers/worker_helper.ex | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/pleroma/workers/worker_helper.ex b/lib/pleroma/workers/worker_helper.ex index 6d27151de..ea9ce9d3b 100644 --- a/lib/pleroma/workers/worker_helper.ex +++ b/lib/pleroma/workers/worker_helper.ex @@ -38,7 +38,7 @@ defmacro __using__(opts) do alias Oban.Job - def enqueue(op, params, worker_args \\ []) do + defp do_enqueue(op, params, worker_args \\ []) do params = Map.merge(%{"op" => op}, params) queue_atom = String.to_atom(unquote(queue)) worker_args = worker_args ++ WorkerHelper.worker_args(queue_atom) @@ -48,11 +48,16 @@ def enqueue(op, params, worker_args \\ []) do |> Oban.insert() end + def enqueue(op, params, worker_args \\ []), + do: do_enqueue(op, params, worker_args) + @impl Oban.Worker def timeout(_job) do queue_atom = String.to_atom(unquote(queue)) Config.get([:workers, :timeout, queue_atom], :timer.minutes(1)) end + + defoverridable enqueue: 3 end end end From d283ac52c34ec0d75ac344dca60f689298cce9cd Mon Sep 17 00:00:00 2001 From: Oneric Date: Tue, 29 Oct 2024 01:41:27 +0100 Subject: [PATCH 07/37] Don't create noop SearchIndexingWorker jobs for passive index --- lib/pleroma/search/database_search.ex | 6 ---- lib/pleroma/search/search_backend.ex | 2 ++ lib/pleroma/workers/search_indexing_worker.ex | 29 ++++++++++++++----- 3 files changed, 24 insertions(+), 13 deletions(-) diff --git a/lib/pleroma/search/database_search.ex b/lib/pleroma/search/database_search.ex index 3dc6245e8..0ded3cb17 100644 --- a/lib/pleroma/search/database_search.ex +++ b/lib/pleroma/search/database_search.ex @@ -40,12 +40,6 @@ def search(user, search_query, options \\ []) do end end - @impl true - def add_to_index(_activity), do: nil - - @impl true - def remove_from_index(_object), do: nil - def maybe_restrict_author(query, %User{} = author) do Activity.Queries.by_author(query, author) end diff --git a/lib/pleroma/search/search_backend.ex b/lib/pleroma/search/search_backend.ex index 56e3b7de5..f7a1b55b9 100644 --- a/lib/pleroma/search/search_backend.ex +++ b/lib/pleroma/search/search_backend.ex @@ -14,4 +14,6 @@ defmodule Pleroma.Search.SearchBackend do from index. """ @callback remove_from_index(object :: Pleroma.Object.t()) :: {:ok, any()} | {:error, any()} + + @optional_callbacks add_to_index: 1, remove_from_index: 1 end diff --git a/lib/pleroma/workers/search_indexing_worker.ex b/lib/pleroma/workers/search_indexing_worker.ex index 518a44c0a..cd6bee1b5 100644 --- a/lib/pleroma/workers/search_indexing_worker.ex +++ b/lib/pleroma/workers/search_indexing_worker.ex @@ -1,23 +1,38 @@ defmodule Pleroma.Workers.SearchIndexingWorker do use Pleroma.Workers.WorkerHelper, queue: "search_indexing" - @impl Oban.Worker + defp search_module(), do: Pleroma.Config.get!([Pleroma.Search, :module]) + def enqueue("add_to_index", params, worker_args) do + if Kernel.function_exported?(search_module(), :add_to_index, 1) do + do_enqueue("add_to_index", params, worker_args) + else + # XXX: or {:ok, nil} to more closely match Oban.inset()'s {:ok, job}? + # or similar to unique coflict: %Oban.Job{conflict?: true} (but omitting all other fileds...) + :ok + end + end + + def enqueue("remove_from_index", params, worker_args) do + if Kernel.function_exported?(search_module(), :remove_from_index, 1) do + do_enqueue("remove_from_index", params, worker_args) + else + :ok + end + end + + @impl Oban.Worker def perform(%Job{args: %{"op" => "add_to_index", "activity" => activity_id}}) do activity = Pleroma.Activity.get_by_id_with_object(activity_id) - search_module = Pleroma.Config.get([Pleroma.Search, :module]) - - search_module.add_to_index(activity) + search_module().add_to_index(activity) :ok end def perform(%Job{args: %{"op" => "remove_from_index", "object" => object_id}}) do - search_module = Pleroma.Config.get([Pleroma.Search, :module]) - # Fake the object so we can remove it from the index without having to keep it in the DB - search_module.remove_from_index(%Pleroma.Object{id: object_id}) + search_module().remove_from_index(%Pleroma.Object{id: object_id}) :ok end From 92544e8f99658be53161caa8c4b5498ae2271d10 Mon Sep 17 00:00:00 2001 From: Oneric Date: Tue, 29 Oct 2024 01:41:54 +0100 Subject: [PATCH 08/37] Don't enqueue a plethora of unnecessary NodeInfoFetcher jobs There were two issues leading to needles effort: Most importnatly, the use of AP IDs as "source_url" meant multiple simultaneous jobs got scheduled for the same instance even with the default unique settings. Also jobs were scheduled uncontionally for each processed AP object meaning we incured oberhead from managing Oban jobs even if we knew it wasn't necessary. By comparison the single query to check if an update is needed should be cheaper overall. --- lib/pleroma/instances/instance.ex | 8 +++++++ .../workers/nodeinfo_fetcher_worker.ex | 23 ++++++++++++++++++- .../web/activity_pub/side_effects_test.exs | 2 +- 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/lib/pleroma/instances/instance.ex b/lib/pleroma/instances/instance.ex index 5c70748b6..63362eb28 100644 --- a/lib/pleroma/instances/instance.ex +++ b/lib/pleroma/instances/instance.ex @@ -158,6 +158,14 @@ def needs_update(%Instance{metadata_updated_at: metadata_updated_at}) do NaiveDateTime.diff(now, metadata_updated_at) > 86_400 end + def needs_update(%URI{host: host}) do + with %Instance{} = instance <- Repo.get_by(Instance, %{host: host}) do + needs_update(instance) + else + _ -> true + end + end + def local do %Instance{ host: Pleroma.Web.Endpoint.host(), diff --git a/lib/pleroma/workers/nodeinfo_fetcher_worker.ex b/lib/pleroma/workers/nodeinfo_fetcher_worker.ex index 27492e1e3..32907bac9 100644 --- a/lib/pleroma/workers/nodeinfo_fetcher_worker.ex +++ b/lib/pleroma/workers/nodeinfo_fetcher_worker.ex @@ -1,9 +1,30 @@ defmodule Pleroma.Workers.NodeInfoFetcherWorker do - use Pleroma.Workers.WorkerHelper, queue: "nodeinfo_fetcher" + use Pleroma.Workers.WorkerHelper, + queue: "nodeinfo_fetcher", + unique: [ + keys: [:op, :source_url], + # old jobs still get pruned after a short while + period: :infinity, + states: Oban.Job.states() + ] alias Oban.Job alias Pleroma.Instances.Instance + def enqueue(op, %{"source_url" => ap_id} = params, worker_args) do + # reduce to base url to avoid enqueueing unneccessary duplicates + domain = + ap_id + |> URI.parse() + |> URI.merge("/") + + if Instance.needs_update(domain) do + do_enqueue(op, %{params | "source_url" => URI.to_string(domain)}, worker_args) + else + :ok + end + end + @impl Oban.Worker def perform(%Job{ args: %{"op" => "process", "source_url" => domain} diff --git a/test/pleroma/web/activity_pub/side_effects_test.exs b/test/pleroma/web/activity_pub/side_effects_test.exs index 28a591d3c..64a1fe6e6 100644 --- a/test/pleroma/web/activity_pub/side_effects_test.exs +++ b/test/pleroma/web/activity_pub/side_effects_test.exs @@ -46,7 +46,7 @@ test "it queues a fetch of instance information" do assert_enqueued( worker: Pleroma.Workers.NodeInfoFetcherWorker, - args: %{"op" => "process", "source_url" => "https://wowee.example.com/users/1"} + args: %{"op" => "process", "source_url" => "https://wowee.example.com/"} ) end end From 280652651c96954e383ccc60065b9b5eb4542a8c Mon Sep 17 00:00:00 2001 From: Oneric Date: Sat, 23 Nov 2024 19:04:11 +0100 Subject: [PATCH 09/37] rich_media: don't reattempt parsing on rejected URLs --- lib/pleroma/web/rich_media/backfill.ex | 4 +++ lib/pleroma/web/rich_media/parser.ex | 15 ++++++------ test/pleroma/web/rich_media/parser_test.exs | 27 ++++++++++++++++----- 3 files changed, 33 insertions(+), 13 deletions(-) diff --git a/lib/pleroma/web/rich_media/backfill.ex b/lib/pleroma/web/rich_media/backfill.ex index 6b2373b01..8c54a0916 100644 --- a/lib/pleroma/web/rich_media/backfill.ex +++ b/lib/pleroma/web/rich_media/backfill.ex @@ -57,6 +57,10 @@ def run(%{"url" => url, "url_hash" => url_hash} = args) do Logger.debug("Rich media error for #{url}: :content_type is #{type}") negative_cache(url_hash, :timer.minutes(30)) + {:error, {:url, reason}} -> + Logger.debug("Rich media error for #{url}: refusing URL #{inspect(reason)}") + negative_cache(url_hash, :timer.minutes(180)) + e -> Logger.debug("Rich media error for #{url}: #{inspect(e)}") {:error, e} diff --git a/lib/pleroma/web/rich_media/parser.ex b/lib/pleroma/web/rich_media/parser.ex index 7f6b5d388..7c5fed2bf 100644 --- a/lib/pleroma/web/rich_media/parser.ex +++ b/lib/pleroma/web/rich_media/parser.ex @@ -16,12 +16,13 @@ def parse(nil), do: nil @spec parse(String.t()) :: {:ok, map()} | {:error, any()} def parse(url) do with {_, true} <- {:config, @config_impl.get([:rich_media, :enabled])}, - :ok <- validate_page_url(url), + {_, :ok} <- {:url, validate_page_url(url)}, {:ok, data} <- parse_url(url) do data = Map.put(data, "url", url) {:ok, data} else {:config, _} -> {:error, :rich_media_disabled} + {:url, {:error, reason}} -> {:error, {:url, reason}} e -> e end end @@ -62,7 +63,7 @@ defp clean_parsed_data(data) do |> Map.new() end - @spec validate_page_url(URI.t() | binary()) :: :ok | :error + @spec validate_page_url(URI.t() | binary()) :: :ok | {:error, term()} defp validate_page_url(page_url) when is_binary(page_url) do validate_tld = @config_impl.get([Pleroma.Formatter, :validate_tld]) @@ -74,20 +75,20 @@ defp validate_page_url(page_url) when is_binary(page_url) do defp validate_page_url(%URI{host: host, scheme: "https"}) do cond do Linkify.Parser.ip?(host) -> - :error + {:error, :ip} host in @config_impl.get([:rich_media, :ignore_hosts], []) -> - :error + {:error, :ignore_hosts} get_tld(host) in @config_impl.get([:rich_media, :ignore_tld], []) -> - :error + {:error, :ignore_tld} true -> :ok end end - defp validate_page_url(_), do: :error + defp validate_page_url(_), do: {:error, "scheme mismatch"} defp parse_uri(true, url) do url @@ -95,7 +96,7 @@ defp parse_uri(true, url) do |> validate_page_url end - defp parse_uri(_, _), do: :error + defp parse_uri(_, _), do: {:error, "not an URL"} defp get_tld(host) do host diff --git a/test/pleroma/web/rich_media/parser_test.exs b/test/pleroma/web/rich_media/parser_test.exs index a5f2563a2..bf7864aa7 100644 --- a/test/pleroma/web/rich_media/parser_test.exs +++ b/test/pleroma/web/rich_media/parser_test.exs @@ -109,25 +109,40 @@ test "does a HEAD request to check if the body is html" do test "refuses to crawl incomplete URLs" do url = "example.com/ogp" - assert :error == Parser.parse(url) + assert {:error, {:url, "scheme mismatch"}} == Parser.parse(url) + end + + test "refuses to crawl plain HTTP and other scheme URL" do + [ + "http://example.com/ogp", + "ftp://example.org/dist/" + ] + |> Enum.each(fn url -> + res = Parser.parse(url) + + assert {:error, {:url, "scheme mismatch"}} == res or + {:error, {:url, "not an URL"}} == res + end) end test "refuses to crawl malformed URLs" do url = "example.com[]/ogp" - assert :error == Parser.parse(url) + assert {:error, {:url, "not an URL"}} == Parser.parse(url) end test "refuses to crawl URLs of private network from posts" do [ - "http://127.0.0.1:4000/notice/9kCP7VNyPJXFOXDrgO", + "https://127.0.0.1:4000/notice/9kCP7VNyPJXFOXDrgO", "https://10.111.10.1/notice/9kCP7V", "https://172.16.32.40/notice/9kCP7V", - "https://192.168.10.40/notice/9kCP7V", - "https://pleroma.local/notice/9kCP7V" + "https://192.168.10.40/notice/9kCP7V" ] |> Enum.each(fn url -> - assert :error == Parser.parse(url) + assert {:error, {:url, :ip}} == Parser.parse(url) end) + + url = "https://pleroma.local/notice/9kCP7V" + assert {:error, {:url, :ignore_tld}} == Parser.parse(url) end test "returns error when disabled" do From 041dedb86e3ae65b648ab6d3839aff72d3366e0b Mon Sep 17 00:00:00 2001 From: Oneric Date: Tue, 29 Oct 2024 01:59:41 +0100 Subject: [PATCH 10/37] Don't reattempt RichMediaBackfill by default Retrying seems unlikely to be helpful: - if it timed out, chances are the short delay before reattempting won't give the remote enough time to recover from its outage and a longer delay makes the job pointless as users likely scrolled further already. (Likely this is already be the case after the first 20s timeout) - if the remote data is so borked we couldn't even parse it far enough for an "invalid metadata" error, chances are it will remain borked upon reattempt --- config/config.exs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/config.exs b/config/config.exs index 03e373d8f..5a6169506 100644 --- a/config/config.exs +++ b/config/config.exs @@ -602,7 +602,7 @@ federator_incoming: 5, federator_outgoing: 5, search_indexing: 2, - rich_media_backfill: 3 + rich_media_backfill: 1 ], timeout: [ activity_expiration: :timer.seconds(5), From f9724b58795a33817479508c3a6a6e637ebe6096 Mon Sep 17 00:00:00 2001 From: Oneric Date: Tue, 29 Oct 2024 01:47:54 +0100 Subject: [PATCH 11/37] =?UTF-8?q?Don=E2=80=99t=20reattempt=20insertion=20o?= =?UTF-8?q?f=20already=20known=20objects?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Might happen if we receive e.g. a Like before the Note arrives in our inbox and we thus already queried the Note ourselves. --- lib/pleroma/workers/receiver_worker.ex | 1 + test/pleroma/web/federator_test.exs | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/pleroma/workers/receiver_worker.ex b/lib/pleroma/workers/receiver_worker.ex index a663a63fe..453400e6b 100644 --- a/lib/pleroma/workers/receiver_worker.ex +++ b/lib/pleroma/workers/receiver_worker.ex @@ -14,6 +14,7 @@ def perform(%Job{args: %{"op" => "incoming_ap_doc", "params" => params}}) do else {:error, :origin_containment_failed} -> {:discard, :origin_containment_failed} {:error, {:reject, reason}} -> {:discard, reason} + {:error, :already_present} -> {:discard, :already_present} {:error, _} = e -> e e -> {:error, e} end diff --git a/test/pleroma/web/federator_test.exs b/test/pleroma/web/federator_test.exs index c9a13632a..30da35669 100644 --- a/test/pleroma/web/federator_test.exs +++ b/test/pleroma/web/federator_test.exs @@ -132,7 +132,7 @@ test "successfully processes incoming AP docs with correct origin" do assert {:ok, _activity} = ObanHelpers.perform(job) assert {:ok, job} = Federator.incoming_ap_doc(params) - assert {:error, :already_present} = ObanHelpers.perform(job) + assert {:discard, :already_present} = ObanHelpers.perform(job) end test "successfully normalises public scope descriptors" do From 9f4d3a936f161fb80f84490c3482992b78d68de9 Mon Sep 17 00:00:00 2001 From: Oneric Date: Sat, 23 Nov 2024 22:44:37 +0100 Subject: [PATCH 12/37] cosmetic/receiver_worker: reformat error cases The next commit adds a multi-statement case and then mix format will enforce this anyway --- lib/pleroma/workers/receiver_worker.ex | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/lib/pleroma/workers/receiver_worker.ex b/lib/pleroma/workers/receiver_worker.ex index 453400e6b..b28442042 100644 --- a/lib/pleroma/workers/receiver_worker.ex +++ b/lib/pleroma/workers/receiver_worker.ex @@ -12,11 +12,20 @@ def perform(%Job{args: %{"op" => "incoming_ap_doc", "params" => params}}) do with {:ok, res} <- Federator.perform(:incoming_ap_doc, params) do {:ok, res} else - {:error, :origin_containment_failed} -> {:discard, :origin_containment_failed} - {:error, {:reject, reason}} -> {:discard, reason} - {:error, :already_present} -> {:discard, :already_present} - {:error, _} = e -> e - e -> {:error, e} + {:error, :origin_containment_failed} -> + {:discard, :origin_containment_failed} + + {:error, {:reject, reason}} -> + {:discard, reason} + + {:error, :already_present} -> + {:discard, :already_present} + + {:error, _} = e -> + e + + e -> + {:error, e} end end end From be2c857845bc2fdfe10d4217136fea9cfed7f4e1 Mon Sep 17 00:00:00 2001 From: Oneric Date: Sat, 23 Nov 2024 22:50:20 +0100 Subject: [PATCH 13/37] receiver_worker: don't reattempt invalid documents MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ideally we’d like to split this up more and count most invalid documents as an error, but silently drop e.g. Deletes for unknown objects. However, this is hard to extract from the changeset and jobs canceled with :discard don’t count as exceptions and I’m not aware of a idiomatic way to cancel further retries while retaining the exception status. Thus at least keep a log, but since superfluous "Delete"s seem kinda frequent, don't log at error, only info level. --- lib/pleroma/workers/receiver_worker.ex | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/lib/pleroma/workers/receiver_worker.ex b/lib/pleroma/workers/receiver_worker.ex index b28442042..193b500f4 100644 --- a/lib/pleroma/workers/receiver_worker.ex +++ b/lib/pleroma/workers/receiver_worker.ex @@ -3,6 +3,8 @@ # SPDX-License-Identifier: AGPL-3.0-only defmodule Pleroma.Workers.ReceiverWorker do + require Logger + alias Pleroma.Web.Federator use Pleroma.Workers.WorkerHelper, queue: "federator_incoming" @@ -21,6 +23,16 @@ def perform(%Job{args: %{"op" => "incoming_ap_doc", "params" => params}}) do {:error, :already_present} -> {:discard, :already_present} + # invalid data or e.g. deleting an object we don't know about anyway + {:error, {:error, {:validate, issue}}} -> + Logger.info("Received invalid AP document: #{inspect(issue)}") + {:discard, :invalid} + + # rarer, but sometimes there’s an additional :error in front + {:error, {:error, {:error, {:validate, issue}}}} -> + Logger.info("Received invalid AP document: (3e) #{inspect(issue)}") + {:discard, :invalid} + {:error, _} = e -> e From cbb0d4b0a85429a32dc3e80dd70fafe40ef4209c Mon Sep 17 00:00:00 2001 From: Oneric Date: Wed, 4 Dec 2024 01:59:02 +0100 Subject: [PATCH 14/37] receiver_worker: log unecpected errors This can't handle process crash errors but i hope those get a stacktrace logged by default --- lib/pleroma/workers/receiver_worker.ex | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/pleroma/workers/receiver_worker.ex b/lib/pleroma/workers/receiver_worker.ex index 193b500f4..6a8f89a03 100644 --- a/lib/pleroma/workers/receiver_worker.ex +++ b/lib/pleroma/workers/receiver_worker.ex @@ -34,9 +34,11 @@ def perform(%Job{args: %{"op" => "incoming_ap_doc", "params" => params}}) do {:discard, :invalid} {:error, _} = e -> + Logger.error("Unexpected AP doc error: #{inspect(e)} from #{inspect(params)}") e e -> + Logger.error("Unexpected AP doc error: (raw) #{inspect(e)} from #{inspect(params)}") {:error, e} end end From 8b5183cb7428a305344acfe797889d7fab67436a Mon Sep 17 00:00:00 2001 From: Oneric Date: Sun, 24 Nov 2024 17:45:08 +0100 Subject: [PATCH 15/37] stats: fix stat spec --- lib/pleroma/stats.ex | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/pleroma/stats.ex b/lib/pleroma/stats.ex index c47a0f9de..88740d155 100644 --- a/lib/pleroma/stats.ex +++ b/lib/pleroma/stats.ex @@ -39,7 +39,8 @@ def force_update do @spec get_stats() :: %{ domain_count: non_neg_integer(), status_count: non_neg_integer(), - user_count: non_neg_integer() + user_count: non_neg_integer(), + remote_user_count: non_neg_integer() } def get_stats do %{stats: stats} = GenServer.call(__MODULE__, :get_state) @@ -60,7 +61,8 @@ def get_peers do stats: %{ domain_count: non_neg_integer(), status_count: non_neg_integer(), - user_count: non_neg_integer() + user_count: non_neg_integer(), + remote_user_count: non_neg_integer() } } def calculate_stat_data do From 138b1aea2f4b80574156bf6cf7e108528086036d Mon Sep 17 00:00:00 2001 From: Oneric Date: Sat, 23 Nov 2024 23:12:50 +0100 Subject: [PATCH 16/37] stats: use cheaper peers query MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This query is one of the top cost offenders during an instances lifetime. For small instances it was shown to take up 30-50% percent of the total database query time, while for bigger isntaces it still held a spot in the top 3 — alost as or even more expensive overall than timeline queries! The good news is, there’s a cheaper way using the instance table: no need to process each entry, no need to filter NULLs and no need to dedupe. EXPLAIN estimates the cost of the old query as 13272.39 and the cost of the new query as 395.74 for me; i.e. a 33-fold reduction. Results can slightly differ. E.g. we might have an old user predating the instance tables existence and no interaction with since or no instance table entry due to failure to query nodeinfo. Conversely, we might have an instance entry but all known users got deleted since. However, this seems unproblematic in practice and well worth the perf improvment. Given the previous query didn’t exclude unreachable instances neither does the new query. --- lib/pleroma/stats.ex | 8 ++++---- .../mastodon_api/controllers/instance_controller_test.exs | 4 ++++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/lib/pleroma/stats.ex b/lib/pleroma/stats.ex index 88740d155..7bbe089f8 100644 --- a/lib/pleroma/stats.ex +++ b/lib/pleroma/stats.ex @@ -10,6 +10,7 @@ defmodule Pleroma.Stats do alias Pleroma.CounterCache alias Pleroma.Repo alias Pleroma.User + alias Pleroma.Instances.Instance @interval :timer.seconds(300) @@ -66,14 +67,13 @@ def get_peers do } } def calculate_stat_data do + # instances table has an unique constraint on the host column peers = from( - u in User, - select: fragment("distinct split_part(?, '@', 2)", u.nickname), - where: u.local != ^true + i in Instance, + select: i.host ) |> Repo.all() - |> Enum.filter(& &1) domain_count = Enum.count(peers) diff --git a/test/pleroma/web/mastodon_api/controllers/instance_controller_test.exs b/test/pleroma/web/mastodon_api/controllers/instance_controller_test.exs index 7b400d1ee..39e9629eb 100644 --- a/test/pleroma/web/mastodon_api/controllers/instance_controller_test.exs +++ b/test/pleroma/web/mastodon_api/controllers/instance_controller_test.exs @@ -61,7 +61,9 @@ test "get instance stats", %{conn: conn} do {:ok, _user2} = User.set_activation(user2, false) insert(:user, %{local: false, nickname: "u@peer1.com"}) + insert(:instance, %{domain: "peer1.com"}) insert(:user, %{local: false, nickname: "u@peer2.com"}) + insert(:instance, %{domain: "peer2.com"}) {:ok, _} = Pleroma.Web.CommonAPI.post(user, %{status: "cofe"}) @@ -81,7 +83,9 @@ test "get instance stats", %{conn: conn} do test "get peers", %{conn: conn} do insert(:user, %{local: false, nickname: "u@peer1.com"}) + insert(:instance, %{domain: "peer1.com"}) insert(:user, %{local: false, nickname: "u@peer2.com"}) + insert(:instance, %{domain: "peer2.com"}) Pleroma.Stats.force_update() From 8e5defe6ca1421e41e543f507a9a65c0617c7571 Mon Sep 17 00:00:00 2001 From: Oneric Date: Wed, 11 Dec 2024 03:03:14 +0100 Subject: [PATCH 17/37] stats: estimate remote user count This value is currently only used by Prometheus metrics but (after optimisng the peer query inthe preceeding commit) the most costly part of instance stats. --- CHANGELOG.md | 2 + lib/pleroma/stats.ex | 20 +++++----- ...00_remote_user_count_estimate_function.exs | 38 +++++++++++++++++++ 3 files changed, 49 insertions(+), 11 deletions(-) create mode 100644 priv/repo/migrations/20241211000000_remote_user_count_estimate_function.exs diff --git a/CHANGELOG.md b/CHANGELOG.md index b0448b0d8..ced89d2b3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ## Changed - Dropped obsolete `ap_enabled` indicator from user table and associated buggy logic +- The remote user count in prometheus metrics is now an estimate instead of an exact number + since the latter proved unreasonably costly to obtain for a merely nice-to-have statistic ## 2025.01.01 diff --git a/lib/pleroma/stats.ex b/lib/pleroma/stats.ex index 7bbe089f8..f33c378dd 100644 --- a/lib/pleroma/stats.ex +++ b/lib/pleroma/stats.ex @@ -79,24 +79,22 @@ def calculate_stat_data do status_count = Repo.aggregate(User.Query.build(%{local: true}), :sum, :note_count) - users_query = + # there are few enough local users for postgres to use an index scan + # (also here an exact count is a bit more important) + user_count = from(u in User, where: u.is_active == true, where: u.local == true, where: not is_nil(u.nickname), where: not u.invisible ) + |> Repo.aggregate(:count, :id) - remote_users_query = - from(u in User, - where: u.is_active == true, - where: u.local == false, - where: not is_nil(u.nickname), - where: not u.invisible - ) - - user_count = Repo.aggregate(users_query, :count, :id) - remote_user_count = Repo.aggregate(remote_users_query, :count, :id) + # but mostly numerous remote users leading to a full a full table scan + # (ecto currently doesn't allow building queries without explicit table) + %{rows: [[remote_user_count]]} = + "SELECT estimate_remote_user_count();" + |> Pleroma.Repo.query!() %{ peers: peers, diff --git a/priv/repo/migrations/20241211000000_remote_user_count_estimate_function.exs b/priv/repo/migrations/20241211000000_remote_user_count_estimate_function.exs new file mode 100644 index 000000000..010f068a5 --- /dev/null +++ b/priv/repo/migrations/20241211000000_remote_user_count_estimate_function.exs @@ -0,0 +1,38 @@ +# Akkoma: Magically expressive social media +# Copyright © 2024 Akkoma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.Repo.Migrations.RemoteUserCountEstimateFunction do + use Ecto.Migration + + @function_name "estimate_remote_user_count" + + def up() do + # yep, this EXPLAIN (ab)use is blessed by the PostgreSQL wiki: + # https://wiki.postgresql.org/wiki/Count_estimate + """ + CREATE OR REPLACE FUNCTION #{@function_name}() + RETURNS integer + LANGUAGE plpgsql AS $$ + DECLARE plan jsonb; + BEGIN + EXECUTE ' + EXPLAIN (FORMAT JSON) + SELECT * + FROM public.users + WHERE local = false AND + is_active = true AND + invisible = false AND + nickname IS NOT NULL; + ' INTO plan; + RETURN plan->0->'Plan'->'Plan Rows'; + END; + $$; + """ + |> execute() + end + + def down() do + execute("DROP FUNCTION IF EXISTS #{@function_name}()") + end +end From 0ba5c3649d4b8e4f3707e93ff22a2f0f59464bf6 Mon Sep 17 00:00:00 2001 From: Oneric Date: Sat, 14 Dec 2024 15:40:52 +0100 Subject: [PATCH 18/37] federator: don't nest {:error, _} tuples It makes decisions based on error sources harder since all possible nesting levels need to be checked for. As shown by the return values handled in the receiver worker something else still nests those, but this is a first start. --- lib/pleroma/web/federator.ex | 6 +++++- lib/pleroma/workers/receiver_worker.ex | 6 +++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/lib/pleroma/web/federator.ex b/lib/pleroma/web/federator.ex index adf8298da..ce1dd8fa8 100644 --- a/lib/pleroma/web/federator.ex +++ b/lib/pleroma/web/federator.ex @@ -117,7 +117,11 @@ def perform(:incoming_ap_doc, params) do e -> # Just drop those for now Logger.debug(fn -> "Unhandled activity\n" <> Jason.encode!(params, pretty: true) end) - {:error, e} + + case e do + {:error, _} -> e + _ -> {:error, e} + end end end end diff --git a/lib/pleroma/workers/receiver_worker.ex b/lib/pleroma/workers/receiver_worker.ex index 6a8f89a03..3eccba812 100644 --- a/lib/pleroma/workers/receiver_worker.ex +++ b/lib/pleroma/workers/receiver_worker.ex @@ -24,13 +24,13 @@ def perform(%Job{args: %{"op" => "incoming_ap_doc", "params" => params}}) do {:discard, :already_present} # invalid data or e.g. deleting an object we don't know about anyway - {:error, {:error, {:validate, issue}}} -> + {:error, {:validate, issue}} -> Logger.info("Received invalid AP document: #{inspect(issue)}") {:discard, :invalid} # rarer, but sometimes there’s an additional :error in front - {:error, {:error, {:error, {:validate, issue}}}} -> - Logger.info("Received invalid AP document: (3e) #{inspect(issue)}") + {:error, {:error, {:validate, issue}}} -> + Logger.info("Received invalid AP document: (2e) #{inspect(issue)}") {:discard, :invalid} {:error, _} = e -> From 0cd4040db684b8d85b7a2e53417adf1fe536c31b Mon Sep 17 00:00:00 2001 From: Oneric Date: Wed, 4 Dec 2024 02:09:49 +0100 Subject: [PATCH 19/37] Error out earlier on missing mandatory reference This is the only user of fetch_actor_and_object which previously just always preteneded to be successful. For all the activity types handled here, we absolutely need the referenced object to be able to process it (other than Announce whether or not processing those activity types for unknown remote objects is desirable in the first place is up for debate) All other users of the similar fetch_actor already properly check success. Note, this currently lumps all reolv failure reasons together, so even e.g. boosts of MRF rejected posts will still exhaust all retries. The following commit improves on this. --- lib/pleroma/web/activity_pub/object_validator.ex | 9 ++++++--- lib/pleroma/web/activity_pub/transmogrifier.ex | 5 ++++- lib/pleroma/workers/receiver_worker.ex | 7 +++++++ 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/lib/pleroma/web/activity_pub/object_validator.ex b/lib/pleroma/web/activity_pub/object_validator.ex index cb0cc9ed7..e44f2fdee 100644 --- a/lib/pleroma/web/activity_pub/object_validator.ex +++ b/lib/pleroma/web/activity_pub/object_validator.ex @@ -253,9 +253,12 @@ def fetch_actor(object) do end def fetch_actor_and_object(object) do - fetch_actor(object) - Object.normalize(object["object"], fetch: true) - :ok + with {:ok, %User{}} <- fetch_actor(object), + %Object{} <- Object.normalize(object["object"], fetch: true) do + :ok + else + _ -> :error + end end defp for_each_history_item( diff --git a/lib/pleroma/web/activity_pub/transmogrifier.ex b/lib/pleroma/web/activity_pub/transmogrifier.ex index 251ba6540..02253a2c8 100644 --- a/lib/pleroma/web/activity_pub/transmogrifier.ex +++ b/lib/pleroma/web/activity_pub/transmogrifier.ex @@ -519,10 +519,13 @@ defp handle_incoming_normalised( defp handle_incoming_normalised(%{"type" => type} = data, _options) when type in ~w{Like EmojiReact Announce Add Remove} do - with :ok <- ObjectValidator.fetch_actor_and_object(data), + with {_, :ok} <- {:link, ObjectValidator.fetch_actor_and_object(data)}, {:ok, activity, _meta} <- Pipeline.common_pipeline(data, local: false) do {:ok, activity} else + {:link, _} -> + {:error, :link_resolve_failed} + e -> {:error, e} end diff --git a/lib/pleroma/workers/receiver_worker.ex b/lib/pleroma/workers/receiver_worker.ex index 3eccba812..4cde93503 100644 --- a/lib/pleroma/workers/receiver_worker.ex +++ b/lib/pleroma/workers/receiver_worker.ex @@ -33,6 +33,13 @@ def perform(%Job{args: %{"op" => "incoming_ap_doc", "params" => params}}) do Logger.info("Received invalid AP document: (2e) #{inspect(issue)}") {:discard, :invalid} + # failed to resolve a necessary referenced remote AP object; + # might be temporary server/network trouble thus reattempt + {:error, :link_resolve_failed} = e -> + # TODO: lower to debug for PR! + Logger.info("Failed to resolve AP link; may retry: #{inspect(params)}") + e + {:error, _} = e -> Logger.error("Unexpected AP doc error: #{inspect(e)} from #{inspect(params)}") e From 2c756005320ac861dae090eafc1f54b18f8dc99e Mon Sep 17 00:00:00 2001 From: Oneric Date: Sat, 14 Dec 2024 19:33:42 +0100 Subject: [PATCH 20/37] federation/incoming: improve link_resolve retry decision MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To facilitate this ObjectValidator.fetch_actor_and_object is adapted to return an informative error. Otherwise we’d be unable to make an informed decision on retrying or not later. There’s no point in retrying to fetch MRF-blocked stuff or private posts for example. --- lib/pleroma/user.ex | 4 ++++ .../web/activity_pub/object_validator.ex | 21 +++++++++++++++++-- .../web/activity_pub/transmogrifier.ex | 9 ++++++++ lib/pleroma/workers/receiver_worker.ex | 4 +++- 4 files changed, 35 insertions(+), 3 deletions(-) diff --git a/lib/pleroma/user.ex b/lib/pleroma/user.ex index 3042d86f2..2e09a89b6 100644 --- a/lib/pleroma/user.ex +++ b/lib/pleroma/user.ex @@ -1999,6 +1999,10 @@ def get_or_fetch_by_ap_id(ap_id, options \\ []) do {%User{} = user, _} -> {:ok, user} + {_, {:error, {:reject, :mrf}}} -> + Logger.debug("Rejected to fetch user due to MRF: #{ap_id}") + {:error, {:reject, :mrf}} + e -> Logger.error("Could not fetch user #{ap_id}, #{inspect(e)}") {:error, :not_found} diff --git a/lib/pleroma/web/activity_pub/object_validator.ex b/lib/pleroma/web/activity_pub/object_validator.ex index e44f2fdee..93980f35b 100644 --- a/lib/pleroma/web/activity_pub/object_validator.ex +++ b/lib/pleroma/web/activity_pub/object_validator.ex @@ -15,6 +15,7 @@ defmodule Pleroma.Web.ActivityPub.ObjectValidator do alias Pleroma.EctoType.ActivityPub.ObjectValidators alias Pleroma.Object alias Pleroma.Object.Containment + alias Pleroma.Object.Fetcher alias Pleroma.User alias Pleroma.Web.ActivityPub.ObjectValidators.AcceptRejectValidator alias Pleroma.Web.ActivityPub.ObjectValidators.AddRemoveValidator @@ -253,11 +254,27 @@ def fetch_actor(object) do end def fetch_actor_and_object(object) do + # Fetcher.fetch_object_from_id already first does a local db lookup with {:ok, %User{}} <- fetch_actor(object), - %Object{} <- Object.normalize(object["object"], fetch: true) do + {:ap_id, id} when is_binary(id) <- + {:ap_id, Pleroma.Web.ActivityPub.Utils.get_ap_id(object["object"])}, + {:ok, %Object{}} <- Fetcher.fetch_object_from_id(id) do :ok else - _ -> :error + {:ap_id, id} -> + {:error, {:validate, "Invalid AP id: #{inspect(id)}"}} + + # if actor: late post from a previously unknown, deleted profile + # if object: private post we're not allowed to access + # (other HTTP replies might just indicate a temporary network failure though!) + {:error, e} when e in [:not_found, :forbidden] -> + {:error, :ignore} + + {:error, _} = e -> + e + + e -> + {:error, e} end end diff --git a/lib/pleroma/web/activity_pub/transmogrifier.ex b/lib/pleroma/web/activity_pub/transmogrifier.ex index 02253a2c8..51912c6a8 100644 --- a/lib/pleroma/web/activity_pub/transmogrifier.ex +++ b/lib/pleroma/web/activity_pub/transmogrifier.ex @@ -523,6 +523,15 @@ defp handle_incoming_normalised(%{"type" => type} = data, _options) {:ok, activity, _meta} <- Pipeline.common_pipeline(data, local: false) do {:ok, activity} else + {:link, {:error, :ignore}} -> + {:error, :ignore} + + {:link, {:error, {:validate, _}} = e} -> + e + + {:link, {:error, {:reject, _}} = e} -> + e + {:link, _} -> {:error, :link_resolve_failed} diff --git a/lib/pleroma/workers/receiver_worker.ex b/lib/pleroma/workers/receiver_worker.ex index 4cde93503..16241a75f 100644 --- a/lib/pleroma/workers/receiver_worker.ex +++ b/lib/pleroma/workers/receiver_worker.ex @@ -23,6 +23,9 @@ def perform(%Job{args: %{"op" => "incoming_ap_doc", "params" => params}}) do {:error, :already_present} -> {:discard, :already_present} + {:error, :ignore} -> + {:discard, :ignore} + # invalid data or e.g. deleting an object we don't know about anyway {:error, {:validate, issue}} -> Logger.info("Received invalid AP document: #{inspect(issue)}") @@ -36,7 +39,6 @@ def perform(%Job{args: %{"op" => "incoming_ap_doc", "params" => params}}) do # failed to resolve a necessary referenced remote AP object; # might be temporary server/network trouble thus reattempt {:error, :link_resolve_failed} = e -> - # TODO: lower to debug for PR! Logger.info("Failed to resolve AP link; may retry: #{inspect(params)}") e From 05bbdbf388766a98898cf13efdaca2bbe0c1dada Mon Sep 17 00:00:00 2001 From: Oneric Date: Wed, 11 Dec 2024 03:41:33 +0100 Subject: [PATCH 21/37] nodeinfo: lower log level of regular actions to debug --- lib/pleroma/instances/instance.ex | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/pleroma/instances/instance.ex b/lib/pleroma/instances/instance.ex index 63362eb28..105556900 100644 --- a/lib/pleroma/instances/instance.ex +++ b/lib/pleroma/instances/instance.ex @@ -188,7 +188,7 @@ def update_metadata(%URI{host: host} = uri) do defp do_update_metadata(%URI{host: host} = uri, existing_record) do if existing_record do if needs_update(existing_record) do - Logger.info("Updating metadata for #{host}") + Logger.debug("Updating metadata for #{host}") favicon = scrape_favicon(uri) nodeinfo = scrape_nodeinfo(uri) @@ -207,7 +207,7 @@ defp do_update_metadata(%URI{host: host} = uri, existing_record) do favicon = scrape_favicon(uri) nodeinfo = scrape_nodeinfo(uri) - Logger.info("Creating metadata for #{host}") + Logger.debug("Creating metadata for #{host}") %Instance{} |> changeset(%{ From bcf3e101f6ebc19aa34adbc093a362571ffc48dc Mon Sep 17 00:00:00 2001 From: Oneric Date: Wed, 11 Dec 2024 03:42:00 +0100 Subject: [PATCH 22/37] rich_media: lower log level of update --- lib/pleroma/web/rich_media/backfill.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/pleroma/web/rich_media/backfill.ex b/lib/pleroma/web/rich_media/backfill.ex index 8c54a0916..16730ec5a 100644 --- a/lib/pleroma/web/rich_media/backfill.ex +++ b/lib/pleroma/web/rich_media/backfill.ex @@ -86,7 +86,7 @@ defp maybe_schedule_expiration(url, fields) do end defp stream_update(%{"activity_id" => activity_id}) do - Logger.info("Rich media backfill: streaming update for activity #{activity_id}") + Logger.debug("Rich media backfill: streaming update for activity #{activity_id}") Pleroma.Activity.get_by_id(activity_id) |> Pleroma.Activity.normalize() From 09736431e0736664f95a4e2bba7cb63889ffcb6a Mon Sep 17 00:00:00 2001 From: Oneric Date: Sat, 14 Dec 2024 21:03:16 +0100 Subject: [PATCH 23/37] Don't spam logs about deleted users MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User.get_or_fetch_by_(apid|nickname) are the only external users of fetch_and_prepare_user_from_ap_id, thus there’s no point in duplicating logging, expecially not at error level. Currently (duplicated) _not_found errors for users make up the bulk of my logs and are created almost every second. Deleted users are a common occurence and not worth logging outside of debug --- lib/pleroma/user.ex | 10 +++++++++- lib/pleroma/web/activity_pub/activity_pub.ex | 11 ++++------- .../activity_pub/mrf/anti_link_spam_policy_test.exs | 8 ++++---- test/pleroma/web/activity_pub/relay_test.exs | 4 ++-- .../web/twitter_api/remote_follow_controller_test.exs | 2 +- 5 files changed, 20 insertions(+), 15 deletions(-) diff --git a/lib/pleroma/user.ex b/lib/pleroma/user.ex index 2e09a89b6..2f4ac1b72 100644 --- a/lib/pleroma/user.ex +++ b/lib/pleroma/user.ex @@ -2003,8 +2003,16 @@ def get_or_fetch_by_ap_id(ap_id, options \\ []) do Logger.debug("Rejected to fetch user due to MRF: #{ap_id}") {:error, {:reject, :mrf}} - e -> + {_, {:error, :not_found}} -> + Logger.debug("User doesn't exist (anymore): #{ap_id}") + {:error, :not_found} + + {_, {:error, e}} -> Logger.error("Could not fetch user #{ap_id}, #{inspect(e)}") + {:error, e} + + e -> + Logger.error("Unexpected error condition while fetching user #{ap_id}, #{inspect(e)}") {:error, :not_found} end end diff --git a/lib/pleroma/web/activity_pub/activity_pub.ex b/lib/pleroma/web/activity_pub/activity_pub.ex index 494a27421..224767b80 100644 --- a/lib/pleroma/web/activity_pub/activity_pub.ex +++ b/lib/pleroma/web/activity_pub/activity_pub.ex @@ -1750,19 +1750,16 @@ defp fetch_and_prepare_user_from_ap_id(ap_id, additional) do else # If this has been deleted, only log a debug and not an error {:error, {"Object has been deleted", _, _} = e} -> - Logger.debug("Could not decode user at fetch #{ap_id}, #{inspect(e)}") - {:error, e} + Logger.debug("User was explicitly deleted #{ap_id}, #{inspect(e)}") + {:error, :not_found} - {:reject, reason} = e -> - Logger.debug("Rejected user #{ap_id}: #{inspect(reason)}") + {:reject, _reason} = e -> {:error, e} {:valid, reason} -> - Logger.debug("Data is not a valid user #{ap_id}: #{inspect(reason)}") - {:error, "Not a user"} + {:error, {:validate, reason}} {:error, e} -> - Logger.error("Could not decode user at fetch #{ap_id}, #{inspect(e)}") {:error, e} end end diff --git a/test/pleroma/web/activity_pub/mrf/anti_link_spam_policy_test.exs b/test/pleroma/web/activity_pub/mrf/anti_link_spam_policy_test.exs index 6182e9717..291108da9 100644 --- a/test/pleroma/web/activity_pub/mrf/anti_link_spam_policy_test.exs +++ b/test/pleroma/web/activity_pub/mrf/anti_link_spam_policy_test.exs @@ -241,11 +241,11 @@ test "it rejects posts without links" do assert capture_log(fn -> {:reject, _} = AntiLinkSpamPolicy.filter(message) - end) =~ "[error] Could not decode user at fetch http://invalid.actor" + end) =~ "[error] Could not fetch user http://invalid.actor," assert capture_log(fn -> {:reject, _} = AntiLinkSpamPolicy.filter(update_message) - end) =~ "[error] Could not decode user at fetch http://invalid.actor" + end) =~ "[error] Could not fetch user http://invalid.actor," end test "it rejects posts with links" do @@ -259,11 +259,11 @@ test "it rejects posts with links" do assert capture_log(fn -> {:reject, _} = AntiLinkSpamPolicy.filter(message) - end) =~ "[error] Could not decode user at fetch http://invalid.actor" + end) =~ "[error] Could not fetch user http://invalid.actor," assert capture_log(fn -> {:reject, _} = AntiLinkSpamPolicy.filter(update_message) - end) =~ "[error] Could not decode user at fetch http://invalid.actor" + end) =~ "[error] Could not fetch user http://invalid.actor," end end diff --git a/test/pleroma/web/activity_pub/relay_test.exs b/test/pleroma/web/activity_pub/relay_test.exs index 99cc2071e..b1c927b49 100644 --- a/test/pleroma/web/activity_pub/relay_test.exs +++ b/test/pleroma/web/activity_pub/relay_test.exs @@ -29,7 +29,7 @@ test "relay actor is invisible" do test "returns errors when user not found" do assert capture_log(fn -> {:error, _} = Relay.follow("test-ap-id") - end) =~ "Could not decode user at fetch" + end) =~ "Could not fetch user test-ap-id," end test "returns activity" do @@ -48,7 +48,7 @@ test "returns activity" do test "returns errors when user not found" do assert capture_log(fn -> {:error, _} = Relay.unfollow("test-ap-id") - end) =~ "Could not decode user at fetch" + end) =~ "Could not fetch user test-ap-id," end test "returns activity" do diff --git a/test/pleroma/web/twitter_api/remote_follow_controller_test.exs b/test/pleroma/web/twitter_api/remote_follow_controller_test.exs index 5a94e4396..66e19b5ef 100644 --- a/test/pleroma/web/twitter_api/remote_follow_controller_test.exs +++ b/test/pleroma/web/twitter_api/remote_follow_controller_test.exs @@ -132,7 +132,7 @@ test "show follow page with error when user can not be fetched by `acct` link", |> html_response(200) assert response =~ "Error fetching user" - end) =~ ":not_found" + end) =~ "User doesn't exist (anymore): https://mastodon.social/users/not_found" end end From caa4fbe326a4c66df51901fc4aee61df0db1211a Mon Sep 17 00:00:00 2001 From: Oneric Date: Tue, 17 Dec 2024 00:14:27 +0100 Subject: [PATCH 24/37] user: avoid database work on superfluous pin The only thing this does is changing the updated_at field of the user. Afaict this came to be because prior to pins federating this was split into two functions, one of which created a changeset, the other applying a given changeset. When this was merged the bits were just copied into place. --- lib/pleroma/user.ex | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/pleroma/user.ex b/lib/pleroma/user.ex index 2f4ac1b72..697597e1b 100644 --- a/lib/pleroma/user.ex +++ b/lib/pleroma/user.ex @@ -2581,10 +2581,10 @@ def add_pinned_object_id(%User{} = user, object_id) do [pinned_objects: "You have already pinned the maximum number of statuses"] end end) + |> update_and_set_cache() else - change(user) + {:ok, user} end - |> update_and_set_cache() end @spec remove_pinned_object_id(User.t(), String.t()) :: {:ok, t()} | {:error, term()} From b0387dee14b027a38b1d98311fc14ead60ff2900 Mon Sep 17 00:00:00 2001 From: Oneric Date: Wed, 18 Dec 2024 00:55:48 +0100 Subject: [PATCH 25/37] Gracefully ignore Undo activities referring to unknown objects --- .../web/activity_pub/transmogrifier.ex | 14 ++++++++++++++ .../web/activity_pub/transmogrifier_test.exs | 19 +++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/lib/pleroma/web/activity_pub/transmogrifier.ex b/lib/pleroma/web/activity_pub/transmogrifier.ex index 51912c6a8..e75542eb4 100644 --- a/lib/pleroma/web/activity_pub/transmogrifier.ex +++ b/lib/pleroma/web/activity_pub/transmogrifier.ex @@ -604,6 +604,20 @@ defp handle_incoming_normalised( when type in ["Like", "EmojiReact", "Announce", "Block"] do with {:ok, activity, _} <- Pipeline.common_pipeline(data, local: false) do {:ok, activity} + else + {:error, {:validate, {:error, %Ecto.Changeset{errors: errors}}}} = e -> + # If we never saw the activity being undone, no need to do anything. + # Inspectinging the validation error content is a bit akward, but looking up the Activity + # ahead of time here would be too costly since Activity queries are not cached + # and there's no way atm to pass the retrieved result along along + if errors[:object] == {"can't find object", []} do + {:error, :ignore} + else + e + end + + e -> + e end end diff --git a/test/pleroma/web/activity_pub/transmogrifier_test.exs b/test/pleroma/web/activity_pub/transmogrifier_test.exs index 94df25100..be07a0fe4 100644 --- a/test/pleroma/web/activity_pub/transmogrifier_test.exs +++ b/test/pleroma/web/activity_pub/transmogrifier_test.exs @@ -52,6 +52,25 @@ test "it works for incoming unfollows with an existing follow" do refute User.following?(User.get_cached_by_ap_id(data["actor"]), user) end + test "it ignores Undo activities for unknown objects" do + undo_data = %{ + "id" => "https://remote.com/undo", + "type" => "Undo", + "actor" => "https:://remote.com/users/unknown", + "object" => %{ + "id" => "https://remote.com/undone_activity/unknown", + "type" => "Like" + } + } + + assert {:error, :ignore} == Transmogrifier.handle_incoming(undo_data) + + user = insert(:user, local: false, ap_id: "https://remote.com/users/known") + undo_data = %{undo_data | "actor" => user.ap_id} + + assert {:error, :ignore} == Transmogrifier.handle_incoming(undo_data) + end + test "it accepts Flag activities" do user = insert(:user) other_user = insert(:user) From 7ad5f8d3c0a02d4b2e93d605403ac6d8558244ec Mon Sep 17 00:00:00 2001 From: Oneric Date: Wed, 18 Dec 2024 01:07:31 +0100 Subject: [PATCH 26/37] object_validators: only query relevant table for object Most of them actually only accept either activities or a non-activity object later; querying both is then a waste of resources and may create false positives. --- .../activity_pub/object_validators/common_validations.ex | 6 +++++- .../web/activity_pub/object_validators/delete_validator.ex | 5 ++++- .../activity_pub/object_validators/emoji_react_validator.ex | 2 +- .../web/activity_pub/object_validators/like_validator.ex | 2 +- .../web/activity_pub/object_validators/undo_validator.ex | 2 +- 5 files changed, 12 insertions(+), 5 deletions(-) diff --git a/lib/pleroma/web/activity_pub/object_validators/common_validations.ex b/lib/pleroma/web/activity_pub/object_validators/common_validations.ex index be5074348..f28cdca92 100644 --- a/lib/pleroma/web/activity_pub/object_validators/common_validations.ex +++ b/lib/pleroma/web/activity_pub/object_validators/common_validations.ex @@ -54,10 +54,14 @@ def validate_actor_presence(cng, options \\ []) do def validate_object_presence(cng, options \\ []) do field_name = Keyword.get(options, :field_name, :object) allowed_types = Keyword.get(options, :allowed_types, false) + allowed_categories = Keyword.get(options, :allowed_object_categores, [:object, :activity]) cng |> validate_change(field_name, fn field_name, object_id -> - object = Object.get_cached_by_ap_id(object_id) || Activity.get_by_ap_id(object_id) + object = + (:object in allowed_categories && Object.get_cached_by_ap_id(object_id)) || + (:activity in allowed_categories && Activity.get_by_ap_id(object_id)) || + nil cond do !object -> diff --git a/lib/pleroma/web/activity_pub/object_validators/delete_validator.ex b/lib/pleroma/web/activity_pub/object_validators/delete_validator.ex index a08e8ebe0..2dcb9a5d6 100644 --- a/lib/pleroma/web/activity_pub/object_validators/delete_validator.ex +++ b/lib/pleroma/web/activity_pub/object_validators/delete_validator.ex @@ -61,7 +61,10 @@ defp validate_data(cng) do |> validate_inclusion(:type, ["Delete"]) |> validate_delete_actor(:actor) |> validate_modification_rights() - |> validate_object_or_user_presence(allowed_types: @deletable_types) + |> validate_object_or_user_presence( + allowed_types: @deletable_types, + allowed_object_categories: [:object] + ) |> add_deleted_activity_id() end diff --git a/lib/pleroma/web/activity_pub/object_validators/emoji_react_validator.ex b/lib/pleroma/web/activity_pub/object_validators/emoji_react_validator.ex index bda67feee..9cafeeb14 100644 --- a/lib/pleroma/web/activity_pub/object_validators/emoji_react_validator.ex +++ b/lib/pleroma/web/activity_pub/object_validators/emoji_react_validator.ex @@ -129,7 +129,7 @@ defp validate_data(data_cng) do |> validate_inclusion(:type, ["EmojiReact"]) |> validate_required([:id, :type, :object, :actor, :context, :to, :cc, :content]) |> validate_actor_presence() - |> validate_object_presence() + |> validate_object_presence(allowed_object_categories: [:object]) |> validate_emoji() |> maybe_validate_tag_presence() end diff --git a/lib/pleroma/web/activity_pub/object_validators/like_validator.ex b/lib/pleroma/web/activity_pub/object_validators/like_validator.ex index 35e000d72..44bb0c238 100644 --- a/lib/pleroma/web/activity_pub/object_validators/like_validator.ex +++ b/lib/pleroma/web/activity_pub/object_validators/like_validator.ex @@ -66,7 +66,7 @@ defp validate_data(data_cng) do |> validate_inclusion(:type, ["Like"]) |> validate_required([:id, :type, :object, :actor, :context, :to, :cc]) |> validate_actor_presence() - |> validate_object_presence() + |> validate_object_presence(allowed_object_categories: [:object]) |> validate_existing_like() end diff --git a/lib/pleroma/web/activity_pub/object_validators/undo_validator.ex b/lib/pleroma/web/activity_pub/object_validators/undo_validator.ex index 703643e3f..06516f6c7 100644 --- a/lib/pleroma/web/activity_pub/object_validators/undo_validator.ex +++ b/lib/pleroma/web/activity_pub/object_validators/undo_validator.ex @@ -44,7 +44,7 @@ defp validate_data(data_cng) do |> validate_inclusion(:type, ["Undo"]) |> validate_required([:id, :type, :object, :actor, :to, :cc]) |> validate_undo_actor(:actor) - |> validate_object_presence() + |> validate_object_presence(allowed_object_categories: [:activity]) |> validate_undo_rights() end From 92bf93a4f7b29273f3985898c8393d7540b67935 Mon Sep 17 00:00:00 2001 From: Oneric Date: Wed, 18 Dec 2024 01:21:56 +0100 Subject: [PATCH 27/37] transmogrifier: avoid crashes on non-validation Delte errors Happens e.g. for duplicated Deletes. The remaining tombstone object no longer has an actor, leading to an error response during side-effect handling. --- lib/pleroma/web/activity_pub/transmogrifier.ex | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/pleroma/web/activity_pub/transmogrifier.ex b/lib/pleroma/web/activity_pub/transmogrifier.ex index e75542eb4..8cbfe4ebf 100644 --- a/lib/pleroma/web/activity_pub/transmogrifier.ex +++ b/lib/pleroma/web/activity_pub/transmogrifier.ex @@ -572,6 +572,12 @@ defp handle_incoming_normalised( else _ -> e end + + {:error, _} = e -> + e + + e -> + {:error, e} end end From ac2327c8fc6201953b1acf4efb70ef952a5444cf Mon Sep 17 00:00:00 2001 From: Oneric Date: Wed, 18 Dec 2024 01:27:32 +0100 Subject: [PATCH 28/37] transmogrfier: be more selective about Delete retry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If something else renders the Delete invalid, there’s no point in retrying anyway --- .../web/activity_pub/transmogrifier.ex | 26 ++++++++++++------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/lib/pleroma/web/activity_pub/transmogrifier.ex b/lib/pleroma/web/activity_pub/transmogrifier.ex index 8cbfe4ebf..973f950fb 100644 --- a/lib/pleroma/web/activity_pub/transmogrifier.ex +++ b/lib/pleroma/web/activity_pub/transmogrifier.ex @@ -560,17 +560,23 @@ defp handle_incoming_normalised( Pipeline.common_pipeline(data, local: false) do {:ok, activity} else - {:error, {:validate, _}} = e -> - # Check if we have a create activity for this - with {:ok, object_id} <- ObjectValidators.ObjectID.cast(data["object"]), - %Activity{data: %{"actor" => actor}} <- - Activity.create_by_object_ap_id(object_id) |> Repo.one(), - # We have one, insert a tombstone and retry - {:ok, tombstone_data, _} <- Builder.tombstone(actor, object_id), - {:ok, _tombstone} <- Object.create(tombstone_data) do - handle_incoming(data) + {:error, {:validate, {:error, %Ecto.Changeset{errors: errors}}}} = e -> + if errors[:object] == {"can't find object", []} do + # Check if we have a create activity for this + # (e.g. from a db prune without --prune-activities) + # We'd still like to process side effects so insert a tombstone and retry + with {:ok, object_id} <- ObjectValidators.ObjectID.cast(data["object"]), + %Activity{data: %{"actor" => actor}} <- + Activity.create_by_object_ap_id(object_id) |> Repo.one(), + # We have one, insert a tombstone and retry + {:ok, tombstone_data, _} <- Builder.tombstone(actor, object_id), + {:ok, _tombstone} <- Object.create(tombstone_data) do + handle_incoming(data) + else + _ -> e + end else - _ -> e + e end {:error, _} = e -> From cd8e6a4235fe96359e03f7251e5dc874ce930577 Mon Sep 17 00:00:00 2001 From: Oneric Date: Wed, 18 Dec 2024 01:34:33 +0100 Subject: [PATCH 29/37] transmogrifier: gracefully ignore duplicated object deletes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The object lookup is later repeated in the validator, but due to caching shouldn't incur any noticeable performance impact. It’s actually preferable to check here, since it avoids the otherwise occuring user lookup and overhead from starting and aborting a transaction --- lib/pleroma/object.ex | 5 +++++ lib/pleroma/web/activity_pub/transmogrifier.ex | 18 ++++++++++++++---- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/lib/pleroma/object.ex b/lib/pleroma/object.ex index 5d84bb286..40c1534b9 100644 --- a/lib/pleroma/object.ex +++ b/lib/pleroma/object.ex @@ -215,6 +215,11 @@ def get_cached_by_ap_id(ap_id) do end end + # Intentionally accepts non-Object arguments! + @spec is_tombstone_object?(term()) :: boolean() + def is_tombstone_object?(%Object{data: %{"type" => "Tombstone"}}), do: true + def is_tombstone_object?(_), do: false + def make_tombstone(%Object{data: %{"id" => id, "type" => type}}, deleted \\ DateTime.utc_now()) do %ObjectTombstone{ id: id, diff --git a/lib/pleroma/web/activity_pub/transmogrifier.ex b/lib/pleroma/web/activity_pub/transmogrifier.ex index 973f950fb..01f39e985 100644 --- a/lib/pleroma/web/activity_pub/transmogrifier.ex +++ b/lib/pleroma/web/activity_pub/transmogrifier.ex @@ -556,19 +556,29 @@ defp handle_incoming_normalised( %{"type" => "Delete"} = data, _options ) do - with {:ok, activity, _} <- - Pipeline.common_pipeline(data, local: false) do + oid_result = ObjectValidators.ObjectID.cast(data["object"]) + + with {_, {:ok, object_id}} <- {:object_id, oid_result}, + object <- Object.get_cached_by_ap_id(object_id), + {_, false} <- {:tombstone, Object.is_tombstone_object?(object) && !data["actor"]}, + {:ok, activity, _} <- Pipeline.common_pipeline(data, local: false) do {:ok, activity} else + {:object_id, _} -> + {:error, {:validate, "Invalid object id: #{data["object"]}"}} + + {:tombstone, true} -> + {:error, :ignore} + {:error, {:validate, {:error, %Ecto.Changeset{errors: errors}}}} = e -> if errors[:object] == {"can't find object", []} do # Check if we have a create activity for this # (e.g. from a db prune without --prune-activities) - # We'd still like to process side effects so insert a tombstone and retry + # We'd still like to process side effects so insert a fake tombstone and retry + # (real tombstones from Object.delete do not have an actor field) with {:ok, object_id} <- ObjectValidators.ObjectID.cast(data["object"]), %Activity{data: %{"actor" => actor}} <- Activity.create_by_object_ap_id(object_id) |> Repo.one(), - # We have one, insert a tombstone and retry {:ok, tombstone_data, _} <- Builder.tombstone(actor, object_id), {:ok, _tombstone} <- Object.create(tombstone_data) do handle_incoming(data) From 2ddff7e3860cd0076a6d9a881e6d747835247922 Mon Sep 17 00:00:00 2001 From: Oneric Date: Wed, 18 Dec 2024 21:55:32 +0100 Subject: [PATCH 30/37] transmogrifier: gracefully ignore Delete of unknown objects It's quite common to receive spurious Deletes, so we neither want to waste resources on retrying nor spam "invalid AP" logs --- lib/pleroma/web/activity_pub/transmogrifier.ex | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/pleroma/web/activity_pub/transmogrifier.ex b/lib/pleroma/web/activity_pub/transmogrifier.ex index 01f39e985..6e6244b7c 100644 --- a/lib/pleroma/web/activity_pub/transmogrifier.ex +++ b/lib/pleroma/web/activity_pub/transmogrifier.ex @@ -577,12 +577,13 @@ defp handle_incoming_normalised( # We'd still like to process side effects so insert a fake tombstone and retry # (real tombstones from Object.delete do not have an actor field) with {:ok, object_id} <- ObjectValidators.ObjectID.cast(data["object"]), - %Activity{data: %{"actor" => actor}} <- - Activity.create_by_object_ap_id(object_id) |> Repo.one(), + {_, %Activity{data: %{"actor" => actor}}} <- + {:create, Activity.create_by_object_ap_id(object_id) |> Repo.one()}, {:ok, tombstone_data, _} <- Builder.tombstone(actor, object_id), {:ok, _tombstone} <- Object.create(tombstone_data) do handle_incoming(data) else + {:create, _} -> {:error, :ignore} _ -> e end else From 8fa51700d47b8375fd31f60b3d958ab1bb7da8aa Mon Sep 17 00:00:00 2001 From: Oneric Date: Sat, 21 Dec 2024 16:23:06 +0100 Subject: [PATCH 31/37] changelog: summarise preceeding perf tweaks --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ced89d2b3..6172134e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - Dropped obsolete `ap_enabled` indicator from user table and associated buggy logic - The remote user count in prometheus metrics is now an estimate instead of an exact number since the latter proved unreasonably costly to obtain for a merely nice-to-have statistic +- Various other tweaks improving stat query performance and avoiding unecessary work on received AP documents ## 2025.01.01 From fb3de8045a354fcb189a66b4155508ed9c870b69 Mon Sep 17 00:00:00 2001 From: Oneric Date: Tue, 7 Jan 2025 02:27:45 +0100 Subject: [PATCH 32/37] Expose Port IO stats via Prometheus --- lib/pleroma/web/telemetry.ex | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/lib/pleroma/web/telemetry.ex b/lib/pleroma/web/telemetry.ex index 0ddb2c903..8d43e366f 100644 --- a/lib/pleroma/web/telemetry.ex +++ b/lib/pleroma/web/telemetry.ex @@ -208,8 +208,10 @@ defp summary_fallback_metrics(byte_unit \\ :byte) do dist_metrics ++ vm_metrics end - defp common_metrics do + defp common_metrics(byte_unit \\ :byte) do [ + last_value("vm.portio.in.total", unit: {:byte, byte_unit}), + last_value("vm.portio.out.total", unit: {:byte, byte_unit}), last_value("pleroma.local_users.total"), last_value("pleroma.domains.total"), last_value("pleroma.local_statuses.total"), @@ -220,14 +222,22 @@ defp common_metrics do def prometheus_metrics, do: common_metrics() ++ distribution_metrics() ++ summary_fallback_metrics() - def live_dashboard_metrics, do: common_metrics() ++ summary_metrics(:megabyte) + def live_dashboard_metrics, do: common_metrics(:megabyte) ++ summary_metrics(:megabyte) defp periodic_measurements do [ + {__MODULE__, :io_stats, []}, {__MODULE__, :instance_stats, []} ] end + def io_stats do + # All IO done via erlang ports, i.e. mostly network but also e.g. fasthtml_workers. NOT disk IO! + {{:input, input}, {:output, output}} = :erlang.statistics(:io) + :telemetry.execute([:vm, :portio, :in], %{total: input}, %{}) + :telemetry.execute([:vm, :portio, :out], %{total: output}, %{}) + end + def instance_stats do stats = Stats.get_stats() :telemetry.execute([:pleroma, :local_users], %{total: stats.user_count}, %{}) From 4701aa2a386d88707f5d6232203aed6b959b52db Mon Sep 17 00:00:00 2001 From: Oneric Date: Sun, 15 Dec 2024 02:50:20 +0100 Subject: [PATCH 33/37] receiver_worker: log processes crashes Oban cataches crashes to handle job failure and retry, thus it never bubbles up all the way and nothing is logged by default. For better debugging, catch and log any crashes. --- lib/pleroma/workers/receiver_worker.ex | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/lib/pleroma/workers/receiver_worker.ex b/lib/pleroma/workers/receiver_worker.ex index 16241a75f..13493ec8b 100644 --- a/lib/pleroma/workers/receiver_worker.ex +++ b/lib/pleroma/workers/receiver_worker.ex @@ -50,5 +50,13 @@ def perform(%Job{args: %{"op" => "incoming_ap_doc", "params" => params}}) do Logger.error("Unexpected AP doc error: (raw) #{inspect(e)} from #{inspect(params)}") {:error, e} end + rescue + err -> + Logger.error( + "Receiver worker CRASH on #{inspect(params)} with: #{Exception.format(:error, err, __STACKTRACE__)}" + ) + + # reraise to let oban handle transaction conflicts without deductig an attempt + reraise err, __STACKTRACE__ end end From 46148c082522bcb4b4c87e0f7a5b3800269cc20c Mon Sep 17 00:00:00 2001 From: Oneric Date: Tue, 7 Jan 2025 04:10:58 +0100 Subject: [PATCH 34/37] Don't return garbage on failed collection fetches And for now treat partial fetches as a success, since for all current users partial collection data is better than no data at all. If an error occurred while fetching a page, this previously returned a bogus {:ok, {:error, _}} success, causing the error to be attached to the object as an reply list subsequently leading to the whole post getting rejected during validation. Also the pinned collection caller did not actually handle the preexisting error case resulting in process crashes. --- lib/pleroma/collections/fetcher.ex | 25 +++++++++++++------- lib/pleroma/web/activity_pub/activity_pub.ex | 22 ++++++++++------- 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/lib/pleroma/collections/fetcher.ex b/lib/pleroma/collections/fetcher.ex index 9ab883cc2..a4c3463a2 100644 --- a/lib/pleroma/collections/fetcher.ex +++ b/lib/pleroma/collections/fetcher.ex @@ -14,7 +14,7 @@ defmodule Akkoma.Collections.Fetcher do @spec fetch_collection(String.t() | map()) :: {:ok, [Pleroma.Object.t()]} | {:error, any()} def fetch_collection(ap_id) when is_binary(ap_id) do with {:ok, page} <- Fetcher.fetch_and_contain_remote_object_from_id(ap_id) do - {:ok, objects_from_collection(page)} + partial_as_success(objects_from_collection(page)) else e -> Logger.error("Could not fetch collection #{ap_id} - #{inspect(e)}") @@ -24,9 +24,12 @@ def fetch_collection(ap_id) when is_binary(ap_id) do def fetch_collection(%{"type" => type} = page) when type in ["Collection", "OrderedCollection", "CollectionPage", "OrderedCollectionPage"] do - {:ok, objects_from_collection(page)} + partial_as_success(objects_from_collection(page)) end + defp partial_as_success({:partial, items}), do: {:ok, items} + defp partial_as_success(res), do: res + defp items_in_page(%{"type" => type, "orderedItems" => items}) when is_list(items) and type in ["OrderedCollection", "OrderedCollectionPage"], do: items @@ -53,11 +56,11 @@ defp objects_from_collection(%{"type" => type, "first" => %{"id" => id}}) fetch_page_items(id) end - defp objects_from_collection(_page), do: [] + defp objects_from_collection(_page), do: {:ok, []} defp fetch_page_items(id, items \\ []) do if Enum.count(items) >= Config.get([:activitypub, :max_collection_objects]) do - items + {:ok, items} else with {:ok, page} <- Fetcher.fetch_and_contain_remote_object_from_id(id) do objects = items_in_page(page) @@ -65,18 +68,22 @@ defp fetch_page_items(id, items \\ []) do if Enum.count(objects) > 0 do maybe_next_page(page, items ++ objects) else - items + {:ok, items} end else {:error, :not_found} -> - items + {:ok, items} {:error, :forbidden} -> - items + {:ok, items} {:error, error} -> Logger.error("Could not fetch page #{id} - #{inspect(error)}") - {:error, error} + + case items do + [] -> {:error, error} + _ -> {:partial, items} + end end end end @@ -85,5 +92,5 @@ defp maybe_next_page(%{"next" => id}, items) when is_binary(id) do fetch_page_items(id, items) end - defp maybe_next_page(_, items), do: items + defp maybe_next_page(_, items), do: {:ok, items} end diff --git a/lib/pleroma/web/activity_pub/activity_pub.ex b/lib/pleroma/web/activity_pub/activity_pub.ex index 224767b80..746d18639 100644 --- a/lib/pleroma/web/activity_pub/activity_pub.ex +++ b/lib/pleroma/web/activity_pub/activity_pub.ex @@ -1797,7 +1797,7 @@ def pin_data_from_featured_collection(%{ else e -> Logger.error("Could not decode featured collection at fetch #{first}, #{inspect(e)}") - {:ok, %{}} + %{} end end @@ -1807,14 +1807,18 @@ def pin_data_from_featured_collection( } = collection ) when type in ["OrderedCollection", "Collection"] do - {:ok, objects} = Collections.Fetcher.fetch_collection(collection) - - # Items can either be a map _or_ a string - objects - |> Map.new(fn - ap_id when is_binary(ap_id) -> {ap_id, NaiveDateTime.utc_now()} - %{"id" => object_ap_id} -> {object_ap_id, NaiveDateTime.utc_now()} - end) + with {:ok, objects} <- Collections.Fetcher.fetch_collection(collection) do + # Items can either be a map _or_ a string + objects + |> Map.new(fn + ap_id when is_binary(ap_id) -> {ap_id, NaiveDateTime.utc_now()} + %{"id" => object_ap_id} -> {object_ap_id, NaiveDateTime.utc_now()} + end) + else + e -> + Logger.warning("Failed to fetch featured collection #{collection}, #{inspect(e)}") + %{} + end end def pin_data_from_featured_collection(obj) do From 1b09b9fc22d6f168f3ee28fc4441282e4f7ba917 Mon Sep 17 00:00:00 2001 From: Oneric Date: Thu, 9 Jan 2025 20:40:59 +0100 Subject: [PATCH 35/37] static_fe: fix HTML quotation for upload alt text Reported by riley on IRC --- .../web/templates/static_fe/static_fe/_attachment.html.eex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/pleroma/web/templates/static_fe/static_fe/_attachment.html.eex b/lib/pleroma/web/templates/static_fe/static_fe/_attachment.html.eex index f5bbe9a07..e58af2888 100644 --- a/lib/pleroma/web/templates/static_fe/static_fe/_attachment.html.eex +++ b/lib/pleroma/web/templates/static_fe/static_fe/_attachment.html.eex @@ -1,4 +1,4 @@ -" title="<%= @name %>"> + <%= if @nsfw do %>
<%= gettext("Hover to show content") %>
From a1c841a122382224d6752c4b4c2c8902bd1f4a8b Mon Sep 17 00:00:00 2001 From: Oneric Date: Thu, 9 Jan 2025 21:00:18 +0100 Subject: [PATCH 36/37] federation.md: list FEP-dc88 formatting mathematics Implemented by https://akkoma.dev/AkkomaGang/akkoma/pulls/642 --- FEDERATION.md | 1 + 1 file changed, 1 insertion(+) diff --git a/FEDERATION.md b/FEDERATION.md index 4524ec4e2..93eff05ca 100644 --- a/FEDERATION.md +++ b/FEDERATION.md @@ -10,6 +10,7 @@ ## Supported FEPs - [FEP-67ff: FEDERATION](https://codeberg.org/fediverse/fep/src/branch/main/fep/67ff/fep-67ff.md) +- [FEP-dc88: Formatting Mathematics](https://codeberg.org/fediverse/fep/src/branch/main/fep/dc88/fep-dc88.md) - [FEP-f1d5: NodeInfo in Fediverse Software](https://codeberg.org/fediverse/fep/src/branch/main/fep/f1d5/fep-f1d5.md) - [FEP-fffd: Proxy Objects](https://codeberg.org/fediverse/fep/src/branch/main/fep/fffd/fep-fffd.md) From f0a99b4595f11c8415155ffa32d4653db11c7bae Mon Sep 17 00:00:00 2001 From: Oneric Date: Mon, 10 Feb 2025 04:40:51 +0100 Subject: [PATCH 37/37] article_note_validator: fix handling of Mastodon-style replies collections MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The first collection page is (sometimes?) inlined which caused crashes when attempting to log the fetch failure. But there’s no need to fetch and we can treat it like the other inline collection --- .../object_validators/article_note_page_validator.ex | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/pleroma/web/activity_pub/object_validators/article_note_page_validator.ex b/lib/pleroma/web/activity_pub/object_validators/article_note_page_validator.ex index d1cd496db..e52986502 100644 --- a/lib/pleroma/web/activity_pub/object_validators/article_note_page_validator.ex +++ b/lib/pleroma/web/activity_pub/object_validators/article_note_page_validator.ex @@ -71,7 +71,7 @@ defp fix_tag(data), do: Map.drop(data, ["tag"]) defp fix_replies(%{"replies" => replies} = data) when is_list(replies), do: data - defp fix_replies(%{"replies" => %{"first" => first}} = data) do + defp fix_replies(%{"replies" => %{"first" => first}} = data) when is_binary(first) do with {:ok, replies} <- Akkoma.Collections.Fetcher.fetch_collection(first) do Map.put(data, "replies", replies) else @@ -81,6 +81,10 @@ defp fix_replies(%{"replies" => %{"first" => first}} = data) do end end + defp fix_replies(%{"replies" => %{"first" => %{"items" => replies}}} = data) + when is_list(replies), + do: Map.put(data, "replies", replies) + defp fix_replies(%{"replies" => %{"items" => replies}} = data) when is_list(replies), do: Map.put(data, "replies", replies)