From 05081cd81be675db4825335181182dd110c4b834 Mon Sep 17 00:00:00 2001 From: FloatingGhost Date: Sun, 3 Jul 2022 19:20:59 +0100 Subject: [PATCH] Add collection fetching module --- config/config.exs | 3 +- lib/pleroma/collections/fetcher.ex | 68 +++++++ test/fixtures/collections/ordered_array.json | 19 ++ .../fixtures/collections/unordered_array.json | 19 ++ .../collections/unordered_page_embedded.json | 20 +++ .../collections/unordered_page_first.json | 13 ++ .../collections/unordered_page_reference.json | 8 + .../collections/unordered_page_second.json | 12 ++ .../collections/collections_fetcher_test.exs | 167 ++++++++++++++++++ 9 files changed, 328 insertions(+), 1 deletion(-) create mode 100644 lib/pleroma/collections/fetcher.ex create mode 100644 test/fixtures/collections/ordered_array.json create mode 100644 test/fixtures/collections/unordered_array.json create mode 100644 test/fixtures/collections/unordered_page_embedded.json create mode 100644 test/fixtures/collections/unordered_page_first.json create mode 100644 test/fixtures/collections/unordered_page_reference.json create mode 100644 test/fixtures/collections/unordered_page_second.json create mode 100644 test/pleroma/collections/collections_fetcher_test.exs diff --git a/config/config.exs b/config/config.exs index eb39155df..914b5db61 100644 --- a/config/config.exs +++ b/config/config.exs @@ -363,7 +363,8 @@ config :pleroma, :activitypub, follow_handshake_timeout: 500, note_replies_output_limit: 5, sign_object_fetches: true, - authorized_fetch_mode: false + authorized_fetch_mode: false, + max_collection_objects: 50 config :pleroma, :streamer, workers: 3, diff --git a/lib/pleroma/collections/fetcher.ex b/lib/pleroma/collections/fetcher.ex new file mode 100644 index 000000000..205c62b4e --- /dev/null +++ b/lib/pleroma/collections/fetcher.ex @@ -0,0 +1,68 @@ +# Akkoma: The cooler fediverse server +# Copyright © 2022- Akkoma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Akkoma.Collections.Fetcher do + @moduledoc """ + Activitypub Collections fetching functions + see: https://www.w3.org/TR/activitystreams-core/#paging + """ + alias Pleroma.Object.Fetcher + alias Pleroma.Config + + def fetch_collection_by_ap_id(ap_id) when is_binary(ap_id) do + fetch_collection(ap_id) + end + + defp fetch_collection(ap_id) do + with {:ok, page} <- Fetcher.fetch_and_contain_remote_object_from_id(ap_id) do + {:ok, objects_from_collection(page)} + end + end + + defp items_in_page(%{"type" => type, "orderedItems" => items}) + when is_list(items) and type in ["OrderedCollection", "OrderedCollectionPage"], + do: items + + defp items_in_page(%{"type" => type, "items" => items}) + when is_list(items) and type in ["Collection", "CollectionPage"], + do: items + + defp objects_from_collection(%{"type" => "OrderedCollection", "orderedItems" => items}) + when is_list(items), + do: items + + defp objects_from_collection(%{"type" => "Collection", "items" => items}) when is_list(items), + do: items + + defp objects_from_collection(%{"type" => type, "first" => first}) + when is_binary(first) and type in ["Collection", "OrderedCollection"] do + fetch_page_items(first) + end + + defp objects_from_collection(%{"type" => type, "first" => %{"id" => id}}) + when is_binary(id) and type in ["Collection", "OrderedCollection"] do + fetch_page_items(id) + end + + defp fetch_page_items(id, items \\ []) do + if Enum.count(items) >= Config.get([:activitypub, :max_collection_objects]) do + items + else + {:ok, page} = Fetcher.fetch_and_contain_remote_object_from_id(id) + objects = items_in_page(page) + + if Enum.count(objects) > 0 do + maybe_next_page(page, items ++ objects) + else + items + end + end + end + + defp maybe_next_page(%{"next" => id}, items) when is_binary(id) do + fetch_page_items(id, items) + end + + defp maybe_next_page(_, items), do: items +end diff --git a/test/fixtures/collections/ordered_array.json b/test/fixtures/collections/ordered_array.json new file mode 100644 index 000000000..1f196c6d3 --- /dev/null +++ b/test/fixtures/collections/ordered_array.json @@ -0,0 +1,19 @@ +{ + "@context": "https://www.w3.org/ns/activitystreams", + "id": "https://example.com/collection/ordered_array", + "summary": "Object history", + "type": "OrderedCollection", + "totalItems": 2, + "orderedItems": [ + { + "type": "Create", + "actor": "http://www.test.example/sally", + "object": "http://example.org/foo" + }, + { + "type": "Like", + "actor": "http://www.test.example/joe", + "object": "http://example.org/foo" + } + ] +} diff --git a/test/fixtures/collections/unordered_array.json b/test/fixtures/collections/unordered_array.json new file mode 100644 index 000000000..05d9f8175 --- /dev/null +++ b/test/fixtures/collections/unordered_array.json @@ -0,0 +1,19 @@ +{ + "@context": "https://www.w3.org/ns/activitystreams", + "id": "https://example.com/collection/unordered_array", + "summary": "Object history", + "type": "Collection", + "totalItems": 2, + "items": [ + { + "type": "Create", + "actor": "http://www.test.example/sally", + "object": "http://example.org/foo" + }, + { + "type": "Like", + "actor": "http://www.test.example/joe", + "object": "http://example.org/foo" + } + ] +} diff --git a/test/fixtures/collections/unordered_page_embedded.json b/test/fixtures/collections/unordered_page_embedded.json new file mode 100644 index 000000000..01f9230a4 --- /dev/null +++ b/test/fixtures/collections/unordered_page_embedded.json @@ -0,0 +1,20 @@ +{ + "@context": "https://www.w3.org/ns/activitystreams", + "summary": "Sally's recent activities", + "type": "Collection", + "id": "http://example.org/foo", + "totalItems": 10, + "first": { + "type": "CollectionPage", + "id": "http://example.org/foo?page=1", + "partOf": "http://example.org/foo", + "next": "http://example.org/foo?page=2", + "items": [ + { + "type": "Create", + "actor": "http://www.test.example/sally", + "object": "http://example.org/foo" + } + ] + } +} diff --git a/test/fixtures/collections/unordered_page_first.json b/test/fixtures/collections/unordered_page_first.json new file mode 100644 index 000000000..f6d54f352 --- /dev/null +++ b/test/fixtures/collections/unordered_page_first.json @@ -0,0 +1,13 @@ +{ + "type": "CollectionPage", + "id": "https://example.com/collection/unordered_page_reference?page=1", + "partOf": "https://example.com/collection/unordered_page_reference", + "next": "https://example.com/collection/unordered_page_reference?page=2", + "items": [ + { + "type": "Create", + "actor": "http://www.test.example/sally", + "object": "http://example.org/foo" + } + ] +} diff --git a/test/fixtures/collections/unordered_page_reference.json b/test/fixtures/collections/unordered_page_reference.json new file mode 100644 index 000000000..7376e4f22 --- /dev/null +++ b/test/fixtures/collections/unordered_page_reference.json @@ -0,0 +1,8 @@ +{ + "@context": "https://www.w3.org/ns/activitystreams", + "summary": "Sally's recent activities", + "type": "Collection", + "id": "https://example.com/collection/unordered_page_reference", + "totalItems": 10, + "first": "https://example.com/collection/unordered_page_reference?page=1" +} diff --git a/test/fixtures/collections/unordered_page_second.json b/test/fixtures/collections/unordered_page_second.json new file mode 100644 index 000000000..ee557cb56 --- /dev/null +++ b/test/fixtures/collections/unordered_page_second.json @@ -0,0 +1,12 @@ +{ + "type": "CollectionPage", + "id": "https://example.com/collection/unordered_page_reference?page=2", + "partOf": "https://example.com/collection/unordered_page_reference", + "items": [ + { + "type": "Like", + "actor": "http://www.test.example/sally", + "object": "http://example.org/foo" + } + ] +} diff --git a/test/pleroma/collections/collections_fetcher_test.exs b/test/pleroma/collections/collections_fetcher_test.exs new file mode 100644 index 000000000..b9f84f5c4 --- /dev/null +++ b/test/pleroma/collections/collections_fetcher_test.exs @@ -0,0 +1,167 @@ +defmodule Akkoma.Collections.FetcherTest do + use Pleroma.DataCase + use Oban.Testing, repo: Pleroma.Repo + + alias Akkoma.Collections.Fetcher + + import Tesla.Mock + + setup do + mock(fn env -> apply(HttpRequestMock, :request, [env]) end) + :ok + end + + test "it should extract items from an embedded array in a Collection" do + unordered_collection = + "test/fixtures/collections/unordered_array.json" + |> File.read!() + + ap_id = "https://example.com/collection/ordered_array" + + Tesla.Mock.mock(fn + %{ + method: :get, + url: ^ap_id + } -> + %Tesla.Env{ + status: 200, + body: unordered_collection, + headers: [{"content-type", "application/activity+json"}] + } + end) + + {:ok, objects} = Fetcher.fetch_collection_by_ap_id(ap_id) + assert [%{"type" => "Create"}, %{"type" => "Like"}] = objects + end + + test "it should extract items from an embedded array in an OrderedCollection" do + ordered_collection = + "test/fixtures/collections/ordered_array.json" + |> File.read!() + + ap_id = "https://example.com/collection/ordered_array" + + Tesla.Mock.mock(fn + %{ + method: :get, + url: ^ap_id + } -> + %Tesla.Env{ + status: 200, + body: ordered_collection, + headers: [{"content-type", "application/activity+json"}] + } + end) + + {:ok, objects} = Fetcher.fetch_collection_by_ap_id(ap_id) + assert [%{"type" => "Create"}, %{"type" => "Like"}] = objects + end + + test "it should extract items from an referenced first page in a Collection" do + unordered_collection = + "test/fixtures/collections/unordered_page_reference.json" + |> File.read!() + + first_page = + "test/fixtures/collections/unordered_page_first.json" + |> File.read!() + + second_page = + "test/fixtures/collections/unordered_page_second.json" + |> File.read!() + + ap_id = "https://example.com/collection/unordered_page_reference" + first_page_id = "https://example.com/collection/unordered_page_reference?page=1" + second_page_id = "https://example.com/collection/unordered_page_reference?page=2" + + Tesla.Mock.mock(fn + %{ + method: :get, + url: ^ap_id + } -> + %Tesla.Env{ + status: 200, + body: unordered_collection, + headers: [{"content-type", "application/activity+json"}] + } + + %{ + method: :get, + url: ^first_page_id + } -> + %Tesla.Env{ + status: 200, + body: first_page, + headers: [{"content-type", "application/activity+json"}] + } + + %{ + method: :get, + url: ^second_page_id + } -> + %Tesla.Env{ + status: 200, + body: second_page, + headers: [{"content-type", "application/activity+json"}] + } + end) + + {:ok, objects} = Fetcher.fetch_collection_by_ap_id(ap_id) + assert [%{"type" => "Create"}, %{"type" => "Like"}] = objects + end + + test "it should stop fetching when we hit :max_collection_objects" do + clear_config([:activitypub, :max_collection_objects], 1) + + unordered_collection = + "test/fixtures/collections/unordered_page_reference.json" + |> File.read!() + + first_page = + "test/fixtures/collections/unordered_page_first.json" + |> File.read!() + + second_page = + "test/fixtures/collections/unordered_page_second.json" + |> File.read!() + + ap_id = "https://example.com/collection/unordered_page_reference" + first_page_id = "https://example.com/collection/unordered_page_reference?page=1" + second_page_id = "https://example.com/collection/unordered_page_reference?page=2" + + Tesla.Mock.mock(fn + %{ + method: :get, + url: ^ap_id + } -> + %Tesla.Env{ + status: 200, + body: unordered_collection, + headers: [{"content-type", "application/activity+json"}] + } + + %{ + method: :get, + url: ^first_page_id + } -> + %Tesla.Env{ + status: 200, + body: first_page, + headers: [{"content-type", "application/activity+json"}] + } + + %{ + method: :get, + url: ^second_page_id + } -> + %Tesla.Env{ + status: 200, + body: second_page, + headers: [{"content-type", "application/activity+json"}] + } + end) + + {:ok, objects} = Fetcher.fetch_collection_by_ap_id(ap_id) + assert [%{"type" => "Create"}] = objects + end +end