diff --git a/config/config.exs b/config/config.exs index eb39155df..914b5db61 100644 --- a/config/config.exs +++ b/config/config.exs @@ -363,7 +363,8 @@ follow_handshake_timeout: 500, note_replies_output_limit: 5, sign_object_fetches: true, - authorized_fetch_mode: false + authorized_fetch_mode: false, + max_collection_objects: 50 config :pleroma, :streamer, workers: 3, diff --git a/config/description.exs b/config/description.exs index 9401bed5c..3777905a3 100644 --- a/config/description.exs +++ b/config/description.exs @@ -1689,6 +1689,13 @@ type: :integer, description: "Following handshake timeout", suggestions: [500] + }, + %{ + key: :max_collection_objects, + type: :integer, + description: + "The maximum number of items to fetch from a remote collections. Setting this too low can lead to only getting partial collections, but too high and you can end up fetching far too many objects.", + suggestions: [50] } ] }, diff --git a/docs/configuration/cheatsheet.md b/docs/configuration/cheatsheet.md index 3097f1190..11083e831 100644 --- a/docs/configuration/cheatsheet.md +++ b/docs/configuration/cheatsheet.md @@ -236,6 +236,7 @@ Notes: * `deny_follow_blocked`: Whether to disallow following an account that has blocked the user in question * `sign_object_fetches`: Sign object fetches with HTTP signatures * `authorized_fetch_mode`: Require HTTP signatures for AP fetches +* `max_collection_objects`: The maximum number of objects to fetch from a remote AP collection. ## Pleroma.User diff --git a/lib/pleroma/collections/fetcher.ex b/lib/pleroma/collections/fetcher.ex new file mode 100644 index 000000000..382defff4 --- /dev/null +++ b/lib/pleroma/collections/fetcher.ex @@ -0,0 +1,77 @@ +# Akkoma: The cooler fediverse server +# Copyright © 2022- Akkoma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Akkoma.Collections.Fetcher do + @moduledoc """ + Activitypub Collections fetching functions + see: https://www.w3.org/TR/activitystreams-core/#paging + """ + alias Pleroma.Object.Fetcher + alias Pleroma.Config + + def fetch_collection_by_ap_id(ap_id) when is_binary(ap_id) do + fetch_collection(ap_id) + end + + def fetch_collection(ap_id) when is_binary(ap_id) do + with {:ok, page} <- Fetcher.fetch_and_contain_remote_object_from_id(ap_id) do + {:ok, objects_from_collection(page)} + else + e -> + Logger.error("Could not fetch collection #{ap_id} - #{inspect(e)}") + e + end + end + + def fetch_collection(%{"type" => type} = page) + when type in ["Collection", "OrderedCollection"] do + {:ok, objects_from_collection(page)} + end + + defp items_in_page(%{"type" => type, "orderedItems" => items}) + when is_list(items) and type in ["OrderedCollection", "OrderedCollectionPage"], + do: items + + defp items_in_page(%{"type" => type, "items" => items}) + when is_list(items) and type in ["Collection", "CollectionPage"], + do: items + + defp objects_from_collection(%{"type" => "OrderedCollection", "orderedItems" => items}) + when is_list(items), + do: items + + defp objects_from_collection(%{"type" => "Collection", "items" => items}) when is_list(items), + do: items + + defp objects_from_collection(%{"type" => type, "first" => first}) + when is_binary(first) and type in ["Collection", "OrderedCollection"] do + fetch_page_items(first) + end + + defp objects_from_collection(%{"type" => type, "first" => %{"id" => id}}) + when is_binary(id) and type in ["Collection", "OrderedCollection"] do + fetch_page_items(id) + end + + defp fetch_page_items(id, items \\ []) do + if Enum.count(items) >= Config.get([:activitypub, :max_collection_objects]) do + items + else + {:ok, page} = Fetcher.fetch_and_contain_remote_object_from_id(id) + objects = items_in_page(page) + + if Enum.count(objects) > 0 do + maybe_next_page(page, items ++ objects) + else + items + end + end + end + + defp maybe_next_page(%{"next" => id}, items) when is_binary(id) do + fetch_page_items(id, items) + end + + defp maybe_next_page(_, items), do: items +end diff --git a/lib/pleroma/web/activity_pub/activity_pub.ex b/lib/pleroma/web/activity_pub/activity_pub.ex index e6548a818..77f38f9f1 100644 --- a/lib/pleroma/web/activity_pub/activity_pub.ex +++ b/lib/pleroma/web/activity_pub/activity_pub.ex @@ -3,6 +3,7 @@ # SPDX-License-Identifier: AGPL-3.0-only defmodule Pleroma.Web.ActivityPub.ActivityPub do + alias Akkoma.Collections alias Pleroma.Activity alias Pleroma.Activity.Ir.Topics alias Pleroma.Config @@ -1663,10 +1664,27 @@ def maybe_handle_clashing_nickname(data) do end def pin_data_from_featured_collection(%{ - "type" => type, - "orderedItems" => objects - }) + "type" => "OrderedCollection", + "first" => first + }) do + with {:ok, page} <- Fetcher.fetch_and_contain_remote_object_from_id(first) do + page + |> Map.get("orderedItems") + |> Map.new(fn %{"id" => object_ap_id} -> {object_ap_id, NaiveDateTime.utc_now()} end) + else + e -> + Logger.error("Could not decode featured collection at fetch #{first}, #{inspect(e)}") + {:ok, %{}} + end + end + + def pin_data_from_featured_collection( + %{ + "type" => type + } = collection + ) when type in ["OrderedCollection", "Collection"] do + {:ok, objects} = Collections.Fetcher.fetch_collection(collection) Map.new(objects, fn %{"id" => object_ap_id} -> {object_ap_id, NaiveDateTime.utc_now()} end) end diff --git a/test/fixtures/collections/ordered_array.json b/test/fixtures/collections/ordered_array.json new file mode 100644 index 000000000..1f196c6d3 --- /dev/null +++ b/test/fixtures/collections/ordered_array.json @@ -0,0 +1,19 @@ +{ + "@context": "https://www.w3.org/ns/activitystreams", + "id": "https://example.com/collection/ordered_array", + "summary": "Object history", + "type": "OrderedCollection", + "totalItems": 2, + "orderedItems": [ + { + "type": "Create", + "actor": "http://www.test.example/sally", + "object": "http://example.org/foo" + }, + { + "type": "Like", + "actor": "http://www.test.example/joe", + "object": "http://example.org/foo" + } + ] +} diff --git a/test/fixtures/collections/unordered_array.json b/test/fixtures/collections/unordered_array.json new file mode 100644 index 000000000..05d9f8175 --- /dev/null +++ b/test/fixtures/collections/unordered_array.json @@ -0,0 +1,19 @@ +{ + "@context": "https://www.w3.org/ns/activitystreams", + "id": "https://example.com/collection/unordered_array", + "summary": "Object history", + "type": "Collection", + "totalItems": 2, + "items": [ + { + "type": "Create", + "actor": "http://www.test.example/sally", + "object": "http://example.org/foo" + }, + { + "type": "Like", + "actor": "http://www.test.example/joe", + "object": "http://example.org/foo" + } + ] +} diff --git a/test/fixtures/collections/unordered_page_embedded.json b/test/fixtures/collections/unordered_page_embedded.json new file mode 100644 index 000000000..01f9230a4 --- /dev/null +++ b/test/fixtures/collections/unordered_page_embedded.json @@ -0,0 +1,20 @@ +{ + "@context": "https://www.w3.org/ns/activitystreams", + "summary": "Sally's recent activities", + "type": "Collection", + "id": "http://example.org/foo", + "totalItems": 10, + "first": { + "type": "CollectionPage", + "id": "http://example.org/foo?page=1", + "partOf": "http://example.org/foo", + "next": "http://example.org/foo?page=2", + "items": [ + { + "type": "Create", + "actor": "http://www.test.example/sally", + "object": "http://example.org/foo" + } + ] + } +} diff --git a/test/fixtures/collections/unordered_page_first.json b/test/fixtures/collections/unordered_page_first.json new file mode 100644 index 000000000..f6d54f352 --- /dev/null +++ b/test/fixtures/collections/unordered_page_first.json @@ -0,0 +1,13 @@ +{ + "type": "CollectionPage", + "id": "https://example.com/collection/unordered_page_reference?page=1", + "partOf": "https://example.com/collection/unordered_page_reference", + "next": "https://example.com/collection/unordered_page_reference?page=2", + "items": [ + { + "type": "Create", + "actor": "http://www.test.example/sally", + "object": "http://example.org/foo" + } + ] +} diff --git a/test/fixtures/collections/unordered_page_reference.json b/test/fixtures/collections/unordered_page_reference.json new file mode 100644 index 000000000..7376e4f22 --- /dev/null +++ b/test/fixtures/collections/unordered_page_reference.json @@ -0,0 +1,8 @@ +{ + "@context": "https://www.w3.org/ns/activitystreams", + "summary": "Sally's recent activities", + "type": "Collection", + "id": "https://example.com/collection/unordered_page_reference", + "totalItems": 10, + "first": "https://example.com/collection/unordered_page_reference?page=1" +} diff --git a/test/fixtures/collections/unordered_page_second.json b/test/fixtures/collections/unordered_page_second.json new file mode 100644 index 000000000..ee557cb56 --- /dev/null +++ b/test/fixtures/collections/unordered_page_second.json @@ -0,0 +1,12 @@ +{ + "type": "CollectionPage", + "id": "https://example.com/collection/unordered_page_reference?page=2", + "partOf": "https://example.com/collection/unordered_page_reference", + "items": [ + { + "type": "Like", + "actor": "http://www.test.example/sally", + "object": "http://example.org/foo" + } + ] +} diff --git a/test/fixtures/friendica/friendica_featured_collection.json b/test/fixtures/friendica/friendica_featured_collection.json new file mode 100644 index 000000000..f640975f3 --- /dev/null +++ b/test/fixtures/friendica/friendica_featured_collection.json @@ -0,0 +1,29 @@ +{ + "@context": [ + "https://www.w3.org/ns/activitystreams", + "https://w3id.org/security/v1", + { + "vcard": "http://www.w3.org/2006/vcard/ns#", + "dfrn": "http://purl.org/macgirvin/dfrn/1.0/", + "diaspora": "https://diasporafoundation.org/ns/", + "litepub": "http://litepub.social/ns#", + "toot": "http://joinmastodon.org/ns#", + "featured": { + "@id": "toot:featured", + "@type": "@id" + }, + "schema": "http://schema.org#", + "manuallyApprovesFollowers": "as:manuallyApprovesFollowers", + "sensitive": "as:sensitive", + "Hashtag": "as:Hashtag", + "directMessage": "litepub:directMessage", + "discoverable": "toot:discoverable", + "PropertyValue": "schema:PropertyValue", + "value": "schema:value" + } + ], + "id": "https://friendica.example.com/featured/raha", + "type": "OrderedCollection", + "totalItems": 0, + "first": "https://friendica.example.com/featured/raha?page=1" +} diff --git a/test/fixtures/friendica/friendica_featured_collection_first.json b/test/fixtures/friendica/friendica_featured_collection_first.json new file mode 100644 index 000000000..1f9dce420 --- /dev/null +++ b/test/fixtures/friendica/friendica_featured_collection_first.json @@ -0,0 +1,34 @@ +{ + "@context": [ + "https://www.w3.org/ns/activitystreams", + "https://w3id.org/security/v1", + { + "vcard": "http://www.w3.org/2006/vcard/ns#", + "dfrn": "http://purl.org/macgirvin/dfrn/1.0/", + "diaspora": "https://diasporafoundation.org/ns/", + "litepub": "http://litepub.social/ns#", + "toot": "http://joinmastodon.org/ns#", + "featured": { + "@id": "toot:featured", + "@type": "@id" + }, + "schema": "http://schema.org#", + "manuallyApprovesFollowers": "as:manuallyApprovesFollowers", + "sensitive": "as:sensitive", + "Hashtag": "as:Hashtag", + "directMessage": "litepub:directMessage", + "discoverable": "toot:discoverable", + "PropertyValue": "schema:PropertyValue", + "value": "schema:value" + } + ], + "id": "https://friendica.example.com/featured/raha?page=1", + "type": "OrderedCollectionPage", + "totalItems": 0, + "partOf": "https://friendica.example.com/featured/raha", + "orderedItems": [ + { + "id": "http://inserted" + } + ] +} diff --git a/test/pleroma/collections/collections_fetcher_test.exs b/test/pleroma/collections/collections_fetcher_test.exs new file mode 100644 index 000000000..b9f84f5c4 --- /dev/null +++ b/test/pleroma/collections/collections_fetcher_test.exs @@ -0,0 +1,167 @@ +defmodule Akkoma.Collections.FetcherTest do + use Pleroma.DataCase + use Oban.Testing, repo: Pleroma.Repo + + alias Akkoma.Collections.Fetcher + + import Tesla.Mock + + setup do + mock(fn env -> apply(HttpRequestMock, :request, [env]) end) + :ok + end + + test "it should extract items from an embedded array in a Collection" do + unordered_collection = + "test/fixtures/collections/unordered_array.json" + |> File.read!() + + ap_id = "https://example.com/collection/ordered_array" + + Tesla.Mock.mock(fn + %{ + method: :get, + url: ^ap_id + } -> + %Tesla.Env{ + status: 200, + body: unordered_collection, + headers: [{"content-type", "application/activity+json"}] + } + end) + + {:ok, objects} = Fetcher.fetch_collection_by_ap_id(ap_id) + assert [%{"type" => "Create"}, %{"type" => "Like"}] = objects + end + + test "it should extract items from an embedded array in an OrderedCollection" do + ordered_collection = + "test/fixtures/collections/ordered_array.json" + |> File.read!() + + ap_id = "https://example.com/collection/ordered_array" + + Tesla.Mock.mock(fn + %{ + method: :get, + url: ^ap_id + } -> + %Tesla.Env{ + status: 200, + body: ordered_collection, + headers: [{"content-type", "application/activity+json"}] + } + end) + + {:ok, objects} = Fetcher.fetch_collection_by_ap_id(ap_id) + assert [%{"type" => "Create"}, %{"type" => "Like"}] = objects + end + + test "it should extract items from an referenced first page in a Collection" do + unordered_collection = + "test/fixtures/collections/unordered_page_reference.json" + |> File.read!() + + first_page = + "test/fixtures/collections/unordered_page_first.json" + |> File.read!() + + second_page = + "test/fixtures/collections/unordered_page_second.json" + |> File.read!() + + ap_id = "https://example.com/collection/unordered_page_reference" + first_page_id = "https://example.com/collection/unordered_page_reference?page=1" + second_page_id = "https://example.com/collection/unordered_page_reference?page=2" + + Tesla.Mock.mock(fn + %{ + method: :get, + url: ^ap_id + } -> + %Tesla.Env{ + status: 200, + body: unordered_collection, + headers: [{"content-type", "application/activity+json"}] + } + + %{ + method: :get, + url: ^first_page_id + } -> + %Tesla.Env{ + status: 200, + body: first_page, + headers: [{"content-type", "application/activity+json"}] + } + + %{ + method: :get, + url: ^second_page_id + } -> + %Tesla.Env{ + status: 200, + body: second_page, + headers: [{"content-type", "application/activity+json"}] + } + end) + + {:ok, objects} = Fetcher.fetch_collection_by_ap_id(ap_id) + assert [%{"type" => "Create"}, %{"type" => "Like"}] = objects + end + + test "it should stop fetching when we hit :max_collection_objects" do + clear_config([:activitypub, :max_collection_objects], 1) + + unordered_collection = + "test/fixtures/collections/unordered_page_reference.json" + |> File.read!() + + first_page = + "test/fixtures/collections/unordered_page_first.json" + |> File.read!() + + second_page = + "test/fixtures/collections/unordered_page_second.json" + |> File.read!() + + ap_id = "https://example.com/collection/unordered_page_reference" + first_page_id = "https://example.com/collection/unordered_page_reference?page=1" + second_page_id = "https://example.com/collection/unordered_page_reference?page=2" + + Tesla.Mock.mock(fn + %{ + method: :get, + url: ^ap_id + } -> + %Tesla.Env{ + status: 200, + body: unordered_collection, + headers: [{"content-type", "application/activity+json"}] + } + + %{ + method: :get, + url: ^first_page_id + } -> + %Tesla.Env{ + status: 200, + body: first_page, + headers: [{"content-type", "application/activity+json"}] + } + + %{ + method: :get, + url: ^second_page_id + } -> + %Tesla.Env{ + status: 200, + body: second_page, + headers: [{"content-type", "application/activity+json"}] + } + end) + + {:ok, objects} = Fetcher.fetch_collection_by_ap_id(ap_id) + assert [%{"type" => "Create"}] = objects + end +end diff --git a/test/pleroma/web/activity_pub/activity_pub_test.exs b/test/pleroma/web/activity_pub/activity_pub_test.exs index 574ef0d71..2b65f59e0 100644 --- a/test/pleroma/web/activity_pub/activity_pub_test.exs +++ b/test/pleroma/web/activity_pub/activity_pub_test.exs @@ -314,6 +314,44 @@ test "fetches user featured collection" do end end + test "fetches user featured collection using the first property" do + featured_url = "https://friendica.example.com/raha/collections/featured" + first_url = "https://friendica.example.com/featured/raha?page=1" + + featured_data = + "test/fixtures/friendica/friendica_featured_collection.json" + |> File.read!() + + page_data = + "test/fixtures/friendica/friendica_featured_collection_first.json" + |> File.read!() + + Tesla.Mock.mock(fn + %{ + method: :get, + url: ^featured_url + } -> + %Tesla.Env{ + status: 200, + body: featured_data, + headers: [{"content-type", "application/activity+json"}] + } + + %{ + method: :get, + url: ^first_url + } -> + %Tesla.Env{ + status: 200, + body: page_data, + headers: [{"content-type", "application/activity+json"}] + } + end) + + {:ok, data} = ActivityPub.fetch_and_prepare_featured_from_ap_id(featured_url) + assert Map.has_key?(data, "http://inserted") + end + test "it fetches the appropriate tag-restricted posts" do user = insert(:user)