diff --git a/config/config.exs b/config/config.exs index c4a690799..dfd2fc434 100644 --- a/config/config.exs +++ b/config/config.exs @@ -553,10 +553,12 @@ remote_fetcher: 2, attachments_cleanup: 1, new_users_digest: 1, + hashtags_cleanup: 1, mute_expire: 5 ], plugins: [Oban.Plugins.Pruner], crontab: [ + {"0 1 * * *", Pleroma.Workers.Cron.HashtagsCleanupWorker}, {"0 0 * * 0", Pleroma.Workers.Cron.DigestEmailsWorker}, {"0 0 * * *", Pleroma.Workers.Cron.NewUsersDigestWorker} ] diff --git a/config/description.exs b/config/description.exs index 46f085c70..147c1930c 100644 --- a/config/description.exs +++ b/config/description.exs @@ -1943,6 +1943,7 @@ type: {:list, :tuple}, description: "Settings for cron background jobs", suggestions: [ + {"0 1 * * *", Pleroma.Workers.Cron.HashtagsCleanupWorker}, {"0 0 * * 0", Pleroma.Workers.Cron.DigestEmailsWorker}, {"0 0 * * *", Pleroma.Workers.Cron.NewUsersDigestWorker} ] diff --git a/lib/pleroma/migrators/hashtags_table_migrator.ex b/lib/pleroma/migrators/hashtags_table_migrator.ex index 6a1c9592c..07b42a7f4 100644 --- a/lib/pleroma/migrators/hashtags_table_migrator.ex +++ b/lib/pleroma/migrators/hashtags_table_migrator.ex @@ -152,6 +152,7 @@ def handle_info(:migrate_hashtags, state) do defp query do # Note: most objects have Mention-type AS2 tags and no hashtags (but we can't filter them out) + # Note: not checking activity type; HashtagsCleanupWorker should clean up unused records later from( object in Object, where: diff --git a/lib/pleroma/object.ex b/lib/pleroma/object.ex index 9edf43e04..52b77e41c 100644 --- a/lib/pleroma/object.ex +++ b/lib/pleroma/object.ex @@ -65,6 +65,7 @@ def change(struct, params \\ %{}) do |> maybe_handle_hashtags_change(struct) end + # Note: not checking activity type; HashtagsCleanupWorker should clean up unused records later defp maybe_handle_hashtags_change(changeset, struct) do with data_hashtags_change = get_change(changeset, :data), true <- hashtags_changed?(struct, data_hashtags_change), diff --git a/lib/pleroma/workers/cron/hashtags_cleanup_worker.ex b/lib/pleroma/workers/cron/hashtags_cleanup_worker.ex new file mode 100644 index 000000000..b319067ca --- /dev/null +++ b/lib/pleroma/workers/cron/hashtags_cleanup_worker.ex @@ -0,0 +1,57 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2021 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.Workers.Cron.HashtagsCleanupWorker do + @moduledoc """ + The worker to clean up unused hashtags_objects and hashtags. + """ + + use Oban.Worker, queue: "hashtags_cleanup" + + alias Pleroma.Repo + + require Logger + + @hashtags_objects_query """ + DELETE FROM hashtags_objects WHERE object_id IN + (SELECT DISTINCT objects.id FROM objects + JOIN hashtags_objects ON hashtags_objects.object_id = objects.id LEFT JOIN activities + ON COALESCE(activities.data->'object'->>'id', activities.data->>'object') = + (objects.data->>'id') + AND activities.data->>'type' = 'Create' + WHERE activities.id IS NULL); + """ + + @hashtags_query """ + DELETE FROM hashtags WHERE id IN + (SELECT hashtags.id FROM hashtags + LEFT OUTER JOIN hashtags_objects + ON hashtags_objects.hashtag_id = hashtags.id + WHERE hashtags_objects.hashtag_id IS NULL AND hashtags.inserted_at < $1); + """ + + @impl Oban.Worker + def perform(_job) do + Logger.info("Cleaning up unused `hashtags_objects` records...") + + {:ok, %{num_rows: hashtags_objects_count}} = + Repo.query(@hashtags_objects_query, [], timeout: :infinity) + + Logger.info("Deleted #{hashtags_objects_count} unused `hashtags_objects` records.") + + Logger.info("Cleaning up unused `hashtags` records...") + + # Note: ignoring recently created hashtags since references are added after hashtag is created + {:ok, %{num_rows: hashtags_count}} = + Repo.query(@hashtags_query, [NaiveDateTime.add(NaiveDateTime.utc_now(), -3600 * 24)], + timeout: :infinity + ) + + Logger.info("Deleted #{hashtags_count} unused `hashtags` records.") + + Logger.info("HashtagsCleanupWorker complete.") + + :ok + end +end