Add backfilling of posts #846
|
@ -19,7 +19,7 @@ def cast(object) when is_binary(object) do
|
|||
|
||||
def cast(%{"id" => object}), do: cast(object)
|
||||
|
||||
def cast(_), do: :error
|
||||
def cast(o), do: {:error, o}
|
||||
tudbut marked this conversation as resolved
Outdated
|
||||
|
||||
def dump(data), do: {:ok, data}
|
||||
|
||||
|
|
|
@ -152,6 +152,7 @@ defmodule Pleroma.User do
|
|||
field(:also_known_as, {:array, ObjectValidators.ObjectID}, default: [])
|
||||
field(:inbox, :string)
|
||||
field(:shared_inbox, :string)
|
||||
field(:outbox, :string, default: nil)
|
||||
field(:last_active_at, :naive_datetime)
|
||||
field(:disclose_client, :boolean, default: true)
|
||||
field(:pinned_objects, :map, default: %{})
|
||||
|
@ -469,6 +470,7 @@ def remote_user_changeset(struct \\ %User{local: false}, params) do
|
|||
:ap_id,
|
||||
:inbox,
|
||||
:shared_inbox,
|
||||
:outbox,
|
||||
:nickname,
|
||||
:avatar,
|
||||
:ap_enabled,
|
||||
|
|
|
@ -1587,7 +1587,8 @@ defp object_to_user_data(data, additional) do
|
|||
actor_type = data["type"] || "Person"
|
||||
|
||||
featured_address = data["featured"]
|
||||
{:ok, pinned_objects} = fetch_and_prepare_featured_from_ap_id(featured_address)
|
||||
{:ok, pinned_objects} = fetch_and_prepare_outbox_from_ap_id(featured_address)
|
||||
outbox_address = data["outbox"]
|
||||
|
||||
# first, check that the owner is correct
|
||||
signing_key =
|
||||
|
@ -1644,6 +1645,7 @@ defp object_to_user_data(data, additional) do
|
|||
also_known_as: also_known_as,
|
||||
signing_key: signing_key,
|
||||
inbox: data["inbox"],
|
||||
outbox: outbox_address,
|
||||
shared_inbox: shared_inbox,
|
||||
pinned_objects: pinned_objects,
|
||||
nickname: nickname
|
||||
|
@ -1790,7 +1792,7 @@ def maybe_handle_clashing_nickname(data) do
|
|||
end
|
||||
end
|
||||
|
||||
def pin_data_from_featured_collection(%{
|
||||
def activity_data_from_outbox_collection(%{
|
||||
"type" => "OrderedCollection",
|
||||
"first" => first
|
||||
}) do
|
||||
|
@ -1805,7 +1807,7 @@ def pin_data_from_featured_collection(%{
|
|||
end
|
||||
end
|
||||
|
||||
def pin_data_from_featured_collection(
|
||||
def activity_data_from_outbox_collection(
|
||||
%{
|
||||
"type" => type
|
||||
} = collection
|
||||
|
@ -1821,21 +1823,23 @@ def pin_data_from_featured_collection(
|
|||
end)
|
||||
end
|
||||
|
||||
def pin_data_from_featured_collection(obj) do
|
||||
Logger.error("Could not parse featured collection #{inspect(obj)}")
|
||||
def activity_data_from_outbox_collection(obj) do
|
||||
Logger.error("Could not parse outbox collection #{inspect(obj)}")
|
||||
%{}
|
||||
end
|
||||
|
||||
def fetch_and_prepare_featured_from_ap_id(nil) do
|
||||
def fetch_and_prepare_outbox_from_ap_id(nil) do
|
||||
{:ok, %{}}
|
||||
end
|
||||
|
||||
def fetch_and_prepare_featured_from_ap_id(ap_id) do
|
||||
def fetch_and_prepare_outbox_from_ap_id(ap_id) do
|
||||
Logger.info("Fetching outbox #{ap_id}")
|
||||
|
||||
with {:ok, data} <- Fetcher.fetch_and_contain_remote_object_from_id(ap_id) do
|
||||
{:ok, pin_data_from_featured_collection(data)}
|
||||
{:ok, activity_data_from_outbox_collection(data)}
|
||||
else
|
||||
e ->
|
||||
Logger.error("Could not decode featured collection at fetch #{ap_id}, #{inspect(e)}")
|
||||
Logger.error("Could not decode outbox collection at fetch #{ap_id}, #{inspect(e)}")
|
||||
{:ok, %{}}
|
||||
end
|
||||
end
|
||||
|
@ -1854,6 +1858,19 @@ def enqueue_pin_fetches(%{pinned_objects: pins}) do
|
|||
|
||||
def enqueue_pin_fetches(_), do: nil
|
||||
|
||||
def enqueue_outbox_fetches(%{outbox: outbox_address}) do
|
||||
# enqueue a task to fetch the outbox
|
||||
Logger.debug("Refetching outbox #{outbox_address}")
|
||||
|
||||
Pleroma.Workers.RemoteFetcherWorker.enqueue("fetch_outbox", %{
|
||||
"id" => outbox_address
|
||||
})
|
||||
|
||||
:ok
|
||||
end
|
||||
|
||||
def enqueue_outbox_fetches(_), do: :error
|
||||
|
||||
def make_user_from_ap_id(ap_id, additional \\ []) do
|
||||
user = User.get_cached_by_ap_id(ap_id)
|
||||
|
||||
|
|
|
@ -637,7 +637,10 @@ defp handle_incoming_normalised(
|
|||
end
|
||||
end
|
||||
|
||||
defp handle_incoming_normalised(_, _), do: :error
|
||||
defp handle_incoming_normalised(%{"object" => o}, options),
|
||||
do: handle_incoming_normalised(o, options)
|
||||
|
||||
defp handle_incoming_normalised(o, _), do: {:error, {:unknown_object_class, o}}
|
||||
tudbut marked this conversation as resolved
Outdated
Oneric
commented
Just processing the referenced object if we don't understand the activity is imo a bad idea and likely to break stuff: e.g. consider a malformed Delete which might now end up actually inserting the object instead of deleting it similarly the changed return signature from Just processing the referenced object if we don't understand the activity is imo a bad idea and likely to break stuff: e.g. consider a malformed Delete which might now end up actually inserting the object instead of deleting it
It can also end up breaking #845 *(which i also haven’t reviewed yet)*
similarly the changed return signature from `:error` to a tuple `{:error, {.., ..}` needs some attention to check whether anything is affected by this
tudbut
commented
the issue is that when fetching other things (e.g. pins, we get objects directly. akkoma just wraps things in an activity no matter what, so we get an activity that's inside an activity) the issue is that when fetching other things (e.g. pins, we get objects directly. akkoma just wraps things in an activity no matter what, so we get an activity that's inside an activity)
tudbut
commented
this arises from https://akkoma.dev/AkkomaGang/akkoma/src/branch/develop/lib/pleroma/object/fetcher.ex#L150 where we wrap everything in a fake activity we can modify said fake create activity to have some sort of indication that it is fake this arises from https://akkoma.dev/AkkomaGang/akkoma/src/branch/develop/lib/pleroma/object/fetcher.ex#L150 where we wrap everything in a fake activity
we can modify said fake create activity to have some sort of indication that it is fake
tudbut
commented
adding such an indication in the next commit adding such an indication in the next commit
Oneric
commented
I don't like yet another "fake activity" marker (We already have one) with different semantics. Instead, the fetch logic should be changed to only wrap objects which actually need this Both this and #845 could use a new I don't like yet another "fake activity" marker *(We already have one)* with different semantics. Instead, the fetch logic should be changed to only wrap objects which actually need this
Both this and #845 could use a new `fetchable_objects` constant
tudbut
commented
Where? I couldn't find it personally, but I'm also pretty new to the codebase > We already have one?
Where? I couldn't find it personally, but I'm also pretty new to the codebase
Oneric
commented
Multiple actually:
and also a Furthermore, Fortunately for what you’re trying to do here, it’s easy enough to just not create unecessary activities in the first place, avoiding all of this. Multiple actually:
```
"pleroma:fake_object_id"
"pleroma:fakeid"
"pleroma:fakecontext"
```
and also a `"fake" => true` property sometimes.
Furthermore, `Create` activities for fetched remote posts have a `null` id since a few years or so *(and fixing a fallout from this keeps getting defered due to priority and time, see https://akkoma.dev/AkkomaGang/akkoma/issues/712 )*
The need to deal with them in al subsequent places makes things messy and imo they ideally shouldn't exist at all.
Unfortunately some of this is hard to fix due to fundamental design choices from early days *(e.g. requiring the presence of a `Create` activity for all viewable objects)*.
Fortunately for what you’re trying to do here, it’s easy enough to just not create unecessary activities in the first place, avoiding all of this.
tudbut
commented
Wouldn't I need to completely rewrite the function around https://akkoma.dev/AkkomaGang/akkoma/src/branch/develop/lib/pleroma/object/fetcher.ex#L150 for that? Or at the very least completely bypass the function? I can definitely switch to using "fake" => true or something. Or I can check if the ID is null > Fortunately for what you’re trying to do here, it’s easy enough to just not create unecessary activities in the first place, avoiding all of this.
Wouldn't I need to completely rewrite the function around https://akkoma.dev/AkkomaGang/akkoma/src/branch/develop/lib/pleroma/object/fetcher.ex#L150 for that? Or at the very least completely bypass the function?
I can definitely switch to using "fake" => true or something. Or I can check if the ID is null
tudbut
commented
I just realized this can actually be done relatively easily and will be implementing it now > Instead, the fetch logic should be changed to only wrap objects which actually need this
I just realized this can actually be done relatively easily and will be implementing it now
|
||||
|
||||
@spec get_obj_helper(String.t(), Keyword.t()) :: {:ok, Object.t()} | nil
|
||||
def get_obj_helper(id, options \\ []) do
|
||||
|
|
|
@ -306,6 +306,8 @@ def show(%{assigns: %{user: for_user}} = conn, %{id: nickname_or_id} = params) d
|
|||
def statuses(%{assigns: %{user: reading_user}} = conn, params) do
|
||||
with %User{} = user <- User.get_cached_by_nickname_or_id(params.id, for: reading_user),
|
||||
:visible <- User.visible_for(user, reading_user) do
|
||||
ActivityPub.enqueue_outbox_fetches(user)
|
||||
Oneric
commented
imho this should be limited to, or there should at least be an option to limit this to, authenticated requests only. Processing new posts entailing pulling in their whole threads, new refeenced users and their Also no need to enquee fetched if the viewed profile is a local user Furthermore, there prob should be a timeout (e.g. 60 min) so we don't constantly spam outbox fetches for popular remote users imho this should be limited to, or there should at least be an option to limit this to, authenticated requests only. Processing new posts entailing pulling in their whole threads, new refeenced users and their ~~pinned~~ *(now generally latest)* posts and their threads and so on can become quite costly
Also no need to enquee fetched if the viewed profile is a local user
Furthermore, there prob should be a timeout *(e.g. 60 min)* so we don't constantly spam outbox fetches for popular remote users
tudbut
commented
we do always fetch the outbox, but activities in it are still cached. a timeout may make sense either way. local users never get an outbox address property, its only added to the user object when its a remote one. so technically we're already not fetching local outboxes. making that more explicit makes sense tho we do always fetch the outbox, but activities in it are still cached. a timeout may make sense either way.
local users never get an outbox address property, its only added to the user object when its a remote one. so technically we're already not fetching local outboxes. making that more explicit makes sense tho
Oneric
commented
This is insufficent, since:
And also doesn’t address the limitation to authenticated users only. As I mentioned this can be costly and I don’t want unauthenticated users browsing user profiles to ever be able to create such a significant workload for the instance for profiles actual local users might never even look at. (Letting unauthenticated viewers trigger this also exacerbates the request spam issue) > we do always fetch the outbox, but activities in it are still cached.
This is insufficent, since:
- it still spams remote servers with many requests *(due to default uniqueness job constraint: 1 request per second and per remote user)*; this is not nice on remote servers and might aalso end up creating federation problems for us if we get timeouted due to it
- it creates unecessary Oban job churn *(yes this can matter for perf and also increases PostgreSQL bloat from deleted and not-yet-vacuumed rows; more on that in a future PR once i get to finish what i have on disk)*
And also doesn’t address the limitation to authenticated users only. As I mentioned this can be costly and I don’t want unauthenticated users browsing user profiles to ever be able to create such a significant workload for the instance for profiles actual local users might never even look at. *(Letting unauthenticated viewers trigger this also exacerbates the request spam issue)*
tudbut
commented
I've made it only fetch once per minute per user, and ignore local users. If one minute is too small, I propose making this a setting, though I'm not familiar with how to do that at this point. 10 mins should also be fine to hard-code, but an hour feels a little much. I've made it only fetch once per minute per user, and ignore local users. If one minute is too small, I propose making this a setting, though I'm not familiar with how to do that at this point. 10 mins should also be fine to hard-code, but an hour feels a little much.
Oneric
commented
The timeout mechanism seems like it should work and uses mechansims you’re familiar with To add the restriction for logged-in users, see e.g. If this is too much, we can also adjust those bits later before merge (though it might take a while) The timeout mechanism seems like it should work and uses mechansims you’re familiar with
Though this much additional DB traffic to frequently update timestamp might *(untested)* negatively affect overall perf. Given the cooldown period is well within typicall continuous-uptime, I was thinking of just using a runtime-only hint in caches; see e.g. #762 for an example of a (new) `cachex` set being used for timeouts
To add the restriction for logged-in users, see e.g. `lib/pleroma/web/mastodon_api/controllers/timeline_controller.ex`’s `public/2` function
If this is too much, we can also adjust those bits later before merge *(though it might take a while)*
tudbut
commented
I have now added the cachex bit. Ill be pushing this once i have verified it still works, and will next be looking at the logged in users restriction. I have now added the cachex bit. Ill be pushing this once i have verified it still works, and will next be looking at the logged in users restriction.
tudbut
commented
Okay! Hope this is good now (no more when not logged in, cachex for timeouts). If so, I think that would be all in terms of what you requested, but let me know if I've missed anything! Okay! Hope this is good now (no more when not logged in, cachex for timeouts). If so, I think that would be all in terms of what you requested, but let me know if I've missed anything!
|
||||
|
||||
params =
|
||||
params
|
||||
|> Map.delete(:tagged)
|
||||
|
|
|
@ -4,12 +4,32 @@
|
|||
|
||||
defmodule Pleroma.Workers.RemoteFetcherWorker do
|
||||
alias Pleroma.Object.Fetcher
|
||||
alias Pleroma.Object
|
||||
alias Pleroma.Web.ActivityPub.ActivityPub
|
||||
|
||||
use Pleroma.Workers.WorkerHelper,
|
||||
queue: "remote_fetcher",
|
||||
unique: [period: 300, states: Oban.Job.states(), keys: [:op, :id]]
|
||||
|
||||
@impl Oban.Worker
|
||||
|
||||
def perform(%Job{args: %{"op" => "fetch_outbox", "id" => address}}) do
|
||||
with {:ok, outbox} <- ActivityPub.fetch_and_prepare_outbox_from_ap_id(address) do
|
||||
Enum.each(Enum.reverse(outbox), fn {ap_id, _} ->
|
||||
if is_nil(Object.get_cached_by_ap_id(ap_id)) do
|
||||
enqueue("fetch_remote", %{
|
||||
"id" => ap_id,
|
||||
"depth" => 1
|
||||
})
|
||||
end
|
||||
end)
|
||||
|
||||
:ok
|
||||
else
|
||||
e -> {:error, e}
|
||||
end
|
||||
end
|
||||
|
||||
def perform(%Job{args: %{"op" => "fetch_remote", "id" => id} = args}) do
|
||||
case Fetcher.fetch_object_from_id(id, depth: args["depth"]) do
|
||||
{:ok, _object} ->
|
||||
|
|
15
priv/repo/migrations/20241027040000_users_add_outboxes.exs
Normal file
|
@ -0,0 +1,15 @@
|
|||
defmodule Pleroma.Repo.Migrations.UsersAddOutboxes do
|
||||
use Ecto.Migration
|
||||
|
||||
def up do
|
||||
alter table(:users) do
|
||||
add_if_not_exists(:outbox, :text)
|
||||
end
|
||||
end
|
||||
|
||||
tudbut marked this conversation as resolved
Outdated
Oneric
commented
This does not generally hold This does not generally hold
tudbut
commented
im aware. but for migrating instances, no outboxes will be populated at first so this is kinda the only solution if you want previously fetched users to have the property im aware. but for migrating instances, no outboxes will be populated at first so this is kinda the only solution if you want previously fetched users to have the property
tudbut
commented
any idea how to do this more accurately? any idea how to do this more accurately?
Oneric
commented
don’t populate it and wait for the next user refresh to occur or Update activity to arrive filling outbox with the correct value don’t populate it and wait for the next user refresh to occur or Update activity to arrive filling outbox with the correct value
Oneric
commented
(also this breaks for e.g. users named "inbox") (also this breaks for e.g. users named "inbox")
|
||||
def down do
|
||||
alter table(:users) do
|
||||
remove_if_exists(:outbox, :text)
|
||||
end
|
||||
end
|
||||
end
|
is this signature change actually safe? Why is it done?
Note this is also inconsistent with the return of a prior function clause
this can be reverted. i experienced several errors that were not specific enough and doing this told me what the issue was. ill revert it if you want
reverting this in the next commit