Add period pruning of old data

This commit is contained in:
FloatingGhost 2022-12-01 12:53:04 +00:00
parent 5bc9abff46
commit e747f164d4
8 changed files with 159 additions and 12 deletions

View file

@ -569,7 +569,8 @@
new_users_digest: 1, new_users_digest: 1,
mute_expire: 5, mute_expire: 5,
search_indexing: 10, search_indexing: 10,
nodeinfo_fetcher: 1 nodeinfo_fetcher: 1,
database_prune: 1
], ],
plugins: [ plugins: [
Oban.Plugins.Pruner, Oban.Plugins.Pruner,
@ -577,7 +578,8 @@
], ],
crontab: [ crontab: [
{"0 0 * * 0", Pleroma.Workers.Cron.DigestEmailsWorker}, {"0 0 * * 0", Pleroma.Workers.Cron.DigestEmailsWorker},
{"0 0 * * *", Pleroma.Workers.Cron.NewUsersDigestWorker} {"0 0 * * *", Pleroma.Workers.Cron.NewUsersDigestWorker},
{"0 3 * * *", Pleroma.Workers.Cron.PruneDatabaseWorker}
] ]
config :pleroma, :workers, config :pleroma, :workers,
@ -605,7 +607,8 @@
new_users_digest: :timer.seconds(10), new_users_digest: :timer.seconds(10),
mute_expire: :timer.seconds(5), mute_expire: :timer.seconds(5),
search_indexing: :timer.seconds(5), search_indexing: :timer.seconds(5),
nodeinfo_fetcher: :timer.seconds(10) nodeinfo_fetcher: :timer.seconds(10),
database_prune: :timer.minutes(10)
] ]
config :pleroma, Pleroma.Formatter, config :pleroma, Pleroma.Formatter,

View file

@ -110,6 +110,14 @@ def run(["prune_objects" | args]) do
end end
end end
def run(["prune_task"]) do
start_pleroma()
nil
|> Pleroma.Workers.Cron.PruneDatabaseWorker.perform()
|> IO.inspect()
end
def run(["fix_likes_collections"]) do def run(["fix_likes_collections"]) do
start_pleroma() start_pleroma()

View file

@ -0,0 +1,23 @@
defmodule Pleroma.Activity.Pruner do
@moduledoc """
Prunes activities from the database.
"""
@cutoff 30
alias Pleroma.Activity
alias Pleroma.Repo
import Ecto.Query
def prune_deletes do
before_time = cutoff()
from(a in Activity,
where: fragment("?->>'type' = ?", a.data, "Delete") and a.inserted_at < ^before_time
)
|> Repo.delete_all(timeout: :infinity)
end
defp cutoff do
DateTime.utc_now() |> Timex.shift(days: -@cutoff)
end
end

View file

@ -0,0 +1,33 @@
defmodule Pleroma.Object.Pruner do
@moduledoc """
Prunes objects from the database.
"""
@cutoff 30
alias Pleroma.Object
alias Pleroma.Delivery
alias Pleroma.Repo
import Ecto.Query
def prune_tombstoned_deliveries do
before_time = cutoff()
from(d in Delivery)
|> join(:inner, [d], o in Object, on: d.object_id == o.id)
|> where([d, o], fragment("?->>'type' = ?", o.data, "Tombstone"))
|> Repo.delete_all(timeout: :infinity)
end
def prune_tombstones do
before_time = cutoff()
from(o in Object,
where: fragment("?->>'type' = ?", o.data, "Tombstone") and o.inserted_at < ^before_time
)
|> Repo.delete_all(timeout: :infinity, on_delete: :delete_all)
end
defp cutoff do
DateTime.utc_now() |> Timex.shift(days: -@cutoff)
end
end

View file

@ -0,0 +1,26 @@
defmodule Pleroma.Workers.Cron.PruneDatabaseWorker do
@moduledoc """
The worker to prune old data from the database.
"""
require Logger
use Oban.Worker, queue: "database_prune"
alias Pleroma.Activity.Pruner, as: ActivityPruner
alias Pleroma.Object.Pruner, as: ObjectPruner
@impl Oban.Worker
def perform(_job) do
Logger.info("Pruning old data from the database")
Logger.info("Pruning old deletes")
ActivityPruner.prune_deletes()
Logger.info("Pruning old tombstone delivery entries")
ObjectPruner.prune_tombstoned_deliveries()
Logger.info("Pruning old tombstones")
ObjectPruner.prune_tombstones()
:ok
end
end

View file

@ -0,0 +1,29 @@
defmodule Pleroma.Activity.PrunerTest do
use Pleroma.DataCase, async: true
alias Pleroma.Activity
alias Pleroma.Object
alias Pleroma.Repo
alias Pleroma.Activity.Pruner
import Pleroma.Factory
describe "prune_deletes" do
test "it prunes old delete objects" do
user = insert(:user)
new_delete = insert(:delete_activity, type: "Delete", user: user)
old_delete =
insert(:delete_activity,
type: "Delete",
user: user,
inserted_at: DateTime.utc_now() |> DateTime.add(-31 * 24, :hour)
)
Pruner.prune_deletes()
assert Activity.get_by_id(new_delete.id)
refute Activity.get_by_id(old_delete.id)
end
end
end

View file

@ -0,0 +1,24 @@
defmodule Pleroma.Object.PrunerTest do
use Pleroma.DataCase, async: true
alias Pleroma.Object
alias Pleroma.Repo
alias Pleroma.Object.Pruner
import Pleroma.Factory
describe "prune_deletes" do
test "it prunes old delete objects" do
new_tombstone = insert(:tombstone)
old_tombstone =
insert(:tombstone,
inserted_at: DateTime.utc_now() |> DateTime.add(-31 * 24, :hour)
)
Pruner.prune_tombstones()
assert Object.get_by_id(new_tombstone.id)
refute Object.get_by_id(old_tombstone.id)
end
end
end

View file

@ -233,7 +233,7 @@ def article_factory do
%Pleroma.Object{data: Map.merge(data, %{"type" => "Article"})} %Pleroma.Object{data: Map.merge(data, %{"type" => "Article"})}
end end
def tombstone_factory do def tombstone_factory(attrs) do
data = %{ data = %{
"type" => "Tombstone", "type" => "Tombstone",
"id" => Pleroma.Web.ActivityPub.Utils.generate_object_id(), "id" => Pleroma.Web.ActivityPub.Utils.generate_object_id(),
@ -244,6 +244,7 @@ def tombstone_factory do
%Pleroma.Object{ %Pleroma.Object{
data: data data: data
} }
|> merge_attributes(attrs)
end end
def question_factory(attrs \\ %{}) do def question_factory(attrs \\ %{}) do
@ -493,22 +494,22 @@ def report_activity_factory(attrs \\ %{}) do
} }
end end
def question_activity_factory(attrs \\ %{}) do def delete_activity_factory(attrs \\ %{}) do
user = attrs[:user] || insert(:user) user = attrs[:user] || insert(:user)
question = attrs[:question] || insert(:question, user: user) note_activity = attrs[:note_activity] || insert(:note_activity, user: user)
data_attrs = attrs[:data_attrs] || %{} data_attrs = attrs[:data_attrs] || %{}
attrs = Map.drop(attrs, [:user, :question, :data_attrs]) attrs = Map.drop(attrs, [:user, :data_attrs])
data = data =
%{ %{
"id" => Pleroma.Web.ActivityPub.Utils.generate_activity_id(), "id" => Pleroma.Web.ActivityPub.Utils.generate_activity_id(),
"type" => "Create", "type" => "Delete",
"actor" => question.data["actor"], "actor" => note_activity.data["actor"],
"to" => question.data["to"], "to" => note_activity.data["to"],
"object" => question.data["id"], "object" => note_activity.data["id"],
"published" => DateTime.utc_now() |> DateTime.to_iso8601(), "published" => DateTime.utc_now() |> DateTime.to_iso8601(),
"context" => question.data["context"] "context" => note_activity.data["context"]
} }
|> Map.merge(data_attrs) |> Map.merge(data_attrs)