rip out fetch_initial_posts

Every time someone tries to use it, it goes mad and tries to scrape the
entire fediverse for no visible reason, it's better to just remove it
than continue shipping it in it's current state.

idea acked by lain and feld on irc

Closes  
This commit is contained in:
rinpatch 2020-03-14 15:39:58 +03:00
parent 14ebf8f1e5
commit e87a32bcd7
9 changed files with 21 additions and 170 deletions

View file

@ -504,10 +504,6 @@
federator_outgoing: 5
]
config :pleroma, :fetch_initial_posts,
enabled: false,
pages: 5
config :auto_linker,
opts: [
extra: true,

View file

@ -2007,25 +2007,6 @@
}
]
},
%{
group: :pleroma,
key: :fetch_initial_posts,
type: :group,
description: "Fetching initial posts settings",
children: [
%{
key: :enabled,
type: :boolean,
description: "Fetch posts when a new user is federated with"
},
%{
key: :pages,
type: :integer,
description: "The amount of pages to fetch",
suggestions: [5]
}
]
},
%{
group: :auto_linker,
key: :opts,

View file

@ -151,14 +151,6 @@ config :pleroma, :mrf_user_allowlist,
* `sign_object_fetches`: Sign object fetches with HTTP signatures
* `authorized_fetch_mode`: Require HTTP signatures for AP fetches
### :fetch_initial_posts
!!! warning
Be careful with this setting, fetching posts may lead to new users being discovered whose posts will then also be fetched. This can lead to serious load on your instance and database.
* `enabled`: If enabled, when a new user is discovered by your instance, fetch some of their latest posts.
* `pages`: The amount of pages to fetch
## Pleroma.ScheduledActivity
* `daily_user_limit`: the number of scheduled activities a user is allowed to create in a single day (Default: `25`)

View file

@ -839,10 +839,6 @@ def get_or_fetch_by_nickname(nickname) do
_e ->
with [_nick, _domain] <- String.split(nickname, "@"),
{:ok, user} <- fetch_by_nickname(nickname) do
if Pleroma.Config.get([:fetch_initial_posts, :enabled]) do
fetch_initial_posts(user)
end
{:ok, user}
else
_e -> {:error, "not found " <> nickname}
@ -850,11 +846,6 @@ def get_or_fetch_by_nickname(nickname) do
end
end
@doc "Fetch some posts when the user has just been federated with"
def fetch_initial_posts(user) do
BackgroundWorker.enqueue("fetch_initial_posts", %{"user_id" => user.id})
end
@spec get_followers_query(User.t(), pos_integer() | nil) :: Ecto.Query.t()
def get_followers_query(%User{} = user, nil) do
User.Query.build(%{followers: user, deactivated: false})
@ -1320,16 +1311,6 @@ def perform(:delete, %User{} = user) do
Repo.delete(user)
end
def perform(:fetch_initial_posts, %User{} = user) do
pages = Pleroma.Config.get!([:fetch_initial_posts, :pages])
# Insert all the posts in reverse order, so they're in the right order on the timeline
user.source_data["outbox"]
|> Utils.fetch_ordered_collection(pages)
|> Enum.reverse()
|> Enum.each(&Pleroma.Web.Federator.incoming_ap_doc/1)
end
def perform(:deactivate_async, user, status), do: deactivate(user, status)
@spec perform(atom(), User.t(), list()) :: list() | {:error, any()}
@ -1458,18 +1439,7 @@ def get_or_fetch_by_ap_id(ap_id) do
if !is_nil(user) and !needs_update?(user) do
{:ok, user}
else
# Whether to fetch initial posts for the user (if it's a new user & the fetching is enabled)
should_fetch_initial = is_nil(user) and Pleroma.Config.get([:fetch_initial_posts, :enabled])
resp = fetch_by_ap_id(ap_id)
if should_fetch_initial do
with {:ok, %User{} = user} <- resp do
fetch_initial_posts(user)
end
end
resp
fetch_by_ap_id(ap_id)
end
end

View file

@ -784,45 +784,6 @@ defp build_flag_object(act) when is_map(act) or is_binary(act) do
defp build_flag_object(_), do: []
@doc """
Fetches the OrderedCollection/OrderedCollectionPage from `from`, limiting the amount of pages fetched after
the first one to `pages_left` pages.
If the amount of pages is higher than the collection has, it returns whatever was there.
"""
def fetch_ordered_collection(from, pages_left, acc \\ []) do
with {:ok, response} <- Tesla.get(from),
{:ok, collection} <- Jason.decode(response.body) do
case collection["type"] do
"OrderedCollection" ->
# If we've encountered the OrderedCollection and not the page,
# just call the same function on the page address
fetch_ordered_collection(collection["first"], pages_left)
"OrderedCollectionPage" ->
if pages_left > 0 do
# There are still more pages
if Map.has_key?(collection, "next") do
# There are still more pages, go deeper saving what we have into the accumulator
fetch_ordered_collection(
collection["next"],
pages_left - 1,
acc ++ collection["orderedItems"]
)
else
# No more pages left, just return whatever we already have
acc ++ collection["orderedItems"]
end
else
# Got the amount of pages needed, add them all to the accumulator
acc ++ collection["orderedItems"]
end
_ ->
{:error, "Not an OrderedCollection or OrderedCollectionPage"}
end
end
end
#### Report-related helpers
def get_reports(params, page, page_size) do
params =

View file

@ -10,10 +10,6 @@ defmodule Pleroma.Workers.BackgroundWorker do
use Pleroma.Workers.WorkerHelper, queue: "background"
@impl Oban.Worker
def perform(%{"op" => "fetch_initial_posts", "user_id" => user_id}, _job) do
user = User.get_cached_by_id(user_id)
User.perform(:fetch_initial_posts, user)
end
def perform(%{"op" => "deactivate_user", "user_id" => user_id, "status" => status}, _job) do
user = User.get_cached_by_id(user_id)

View file

@ -0,0 +1,10 @@
defmodule Pleroma.Repo.Migrations.ConfigRemoveFetchInitialPosts do
use Ecto.Migration
def change do
execute(
"delete from config where config.key = ':fetch_initial_posts' and config.group = ':pleroma';",
""
)
end
end

View file

@ -0,0 +1,10 @@
defmodule Pleroma.Repo.Migrations.DeleteFetchInitialPostsJobs do
use Ecto.Migration
def change do
execute(
"delete from oban_jobs where worker = 'Pleroma.Workers.BackgroundWorker' and args->>'op' = 'fetch_initial_posts';",
""
)
end
end

View file

@ -177,71 +177,6 @@ test "does not adress actor's follower address if the activity is not public", %
end
end
describe "fetch_ordered_collection" do
import Tesla.Mock
test "fetches the first OrderedCollectionPage when an OrderedCollection is encountered" do
mock(fn
%{method: :get, url: "http://mastodon.com/outbox"} ->
json(%{"type" => "OrderedCollection", "first" => "http://mastodon.com/outbox?page=true"})
%{method: :get, url: "http://mastodon.com/outbox?page=true"} ->
json(%{"type" => "OrderedCollectionPage", "orderedItems" => ["ok"]})
end)
assert Utils.fetch_ordered_collection("http://mastodon.com/outbox", 1) == ["ok"]
end
test "fetches several pages in the right order one after another, but only the specified amount" do
mock(fn
%{method: :get, url: "http://example.com/outbox"} ->
json(%{
"type" => "OrderedCollectionPage",
"orderedItems" => [0],
"next" => "http://example.com/outbox?page=1"
})
%{method: :get, url: "http://example.com/outbox?page=1"} ->
json(%{
"type" => "OrderedCollectionPage",
"orderedItems" => [1],
"next" => "http://example.com/outbox?page=2"
})
%{method: :get, url: "http://example.com/outbox?page=2"} ->
json(%{"type" => "OrderedCollectionPage", "orderedItems" => [2]})
end)
assert Utils.fetch_ordered_collection("http://example.com/outbox", 0) == [0]
assert Utils.fetch_ordered_collection("http://example.com/outbox", 1) == [0, 1]
end
test "returns an error if the url doesn't have an OrderedCollection/Page" do
mock(fn
%{method: :get, url: "http://example.com/not-an-outbox"} ->
json(%{"type" => "NotAnOutbox"})
end)
assert {:error, _} = Utils.fetch_ordered_collection("http://example.com/not-an-outbox", 1)
end
test "returns the what was collected if there are less pages than specified" do
mock(fn
%{method: :get, url: "http://example.com/outbox"} ->
json(%{
"type" => "OrderedCollectionPage",
"orderedItems" => [0],
"next" => "http://example.com/outbox?page=1"
})
%{method: :get, url: "http://example.com/outbox?page=1"} ->
json(%{"type" => "OrderedCollectionPage", "orderedItems" => [1]})
end)
assert Utils.fetch_ordered_collection("http://example.com/outbox", 5) == [0, 1]
end
end
test "make_json_ld_header/0" do
assert Utils.make_json_ld_header() == %{
"@context" => [