[#3213] Reorganized hashtags cleanup. Transaction-wrapped Hashtag.get_or_create_by_names/1. Misc. improvements.
This commit is contained in:
parent
d1c6dd97aa
commit
a996ab46a5
6 changed files with 112 additions and 100 deletions
|
@ -560,7 +560,6 @@
|
||||||
],
|
],
|
||||||
plugins: [Oban.Plugins.Pruner],
|
plugins: [Oban.Plugins.Pruner],
|
||||||
crontab: [
|
crontab: [
|
||||||
{"0 1 * * *", Pleroma.Workers.Cron.HashtagsCleanupWorker},
|
|
||||||
{"0 0 * * 0", Pleroma.Workers.Cron.DigestEmailsWorker},
|
{"0 0 * * 0", Pleroma.Workers.Cron.DigestEmailsWorker},
|
||||||
{"0 0 * * *", Pleroma.Workers.Cron.NewUsersDigestWorker}
|
{"0 0 * * *", Pleroma.Workers.Cron.NewUsersDigestWorker}
|
||||||
]
|
]
|
||||||
|
|
|
@ -1964,7 +1964,6 @@
|
||||||
type: {:list, :tuple},
|
type: {:list, :tuple},
|
||||||
description: "Settings for cron background jobs",
|
description: "Settings for cron background jobs",
|
||||||
suggestions: [
|
suggestions: [
|
||||||
{"0 1 * * *", Pleroma.Workers.Cron.HashtagsCleanupWorker},
|
|
||||||
{"0 0 * * 0", Pleroma.Workers.Cron.DigestEmailsWorker},
|
{"0 0 * * 0", Pleroma.Workers.Cron.DigestEmailsWorker},
|
||||||
{"0 0 * * *", Pleroma.Workers.Cron.NewUsersDigestWorker}
|
{"0 0 * * *", Pleroma.Workers.Cron.NewUsersDigestWorker}
|
||||||
]
|
]
|
||||||
|
|
|
@ -6,14 +6,17 @@ defmodule Pleroma.Hashtag do
|
||||||
use Ecto.Schema
|
use Ecto.Schema
|
||||||
|
|
||||||
import Ecto.Changeset
|
import Ecto.Changeset
|
||||||
|
import Ecto.Query
|
||||||
|
|
||||||
|
alias Ecto.Multi
|
||||||
alias Pleroma.Hashtag
|
alias Pleroma.Hashtag
|
||||||
|
alias Pleroma.Object
|
||||||
alias Pleroma.Repo
|
alias Pleroma.Repo
|
||||||
|
|
||||||
schema "hashtags" do
|
schema "hashtags" do
|
||||||
field(:name, :string)
|
field(:name, :string)
|
||||||
|
|
||||||
many_to_many(:objects, Pleroma.Object, join_through: "hashtags_objects", on_replace: :delete)
|
many_to_many(:objects, Object, join_through: "hashtags_objects", on_replace: :delete)
|
||||||
|
|
||||||
timestamps()
|
timestamps()
|
||||||
end
|
end
|
||||||
|
@ -34,15 +37,27 @@ def get_or_create_by_name(name) when is_bitstring(name) do
|
||||||
end
|
end
|
||||||
|
|
||||||
def get_or_create_by_names(names) when is_list(names) do
|
def get_or_create_by_names(names) when is_list(names) do
|
||||||
Enum.reduce_while(names, {:ok, []}, fn name, {:ok, list} ->
|
timestamp = NaiveDateTime.truncate(NaiveDateTime.utc_now(), :second)
|
||||||
case get_or_create_by_name(name) do
|
|
||||||
{:ok, %Hashtag{} = hashtag} ->
|
|
||||||
{:cont, {:ok, list ++ [hashtag]}}
|
|
||||||
|
|
||||||
error ->
|
structs =
|
||||||
{:halt, error}
|
Enum.map(names, fn name ->
|
||||||
end
|
%Hashtag{}
|
||||||
|
|> changeset(%{name: name})
|
||||||
|
|> Map.get(:changes)
|
||||||
|
|> Map.merge(%{inserted_at: timestamp, updated_at: timestamp})
|
||||||
end)
|
end)
|
||||||
|
|
||||||
|
with {:ok, %{query_op: hashtags}} <-
|
||||||
|
Multi.new()
|
||||||
|
|> Multi.insert_all(:insert_all_op, Hashtag, structs, on_conflict: :nothing)
|
||||||
|
|> Multi.run(:query_op, fn _repo, _changes ->
|
||||||
|
{:ok, Repo.all(from(ht in Hashtag, where: ht.name in ^names))}
|
||||||
|
end)
|
||||||
|
|> Repo.transaction() do
|
||||||
|
{:ok, hashtags}
|
||||||
|
else
|
||||||
|
{:error, _name, value, _changes_so_far} -> {:error, value}
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def changeset(%Hashtag{} = struct, params) do
|
def changeset(%Hashtag{} = struct, params) do
|
||||||
|
@ -52,4 +67,29 @@ def changeset(%Hashtag{} = struct, params) do
|
||||||
|> validate_required([:name])
|
|> validate_required([:name])
|
||||||
|> unique_constraint(:name)
|
|> unique_constraint(:name)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def unlink(%Object{id: object_id}) do
|
||||||
|
with {_, hashtag_ids} <-
|
||||||
|
from(hto in "hashtags_objects",
|
||||||
|
where: hto.object_id == ^object_id,
|
||||||
|
select: hto.hashtag_id
|
||||||
|
)
|
||||||
|
|> Repo.delete_all() do
|
||||||
|
delete_unreferenced(hashtag_ids)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
@delete_unreferenced_query """
|
||||||
|
DELETE FROM hashtags WHERE id IN
|
||||||
|
(SELECT hashtags.id FROM hashtags
|
||||||
|
LEFT OUTER JOIN hashtags_objects
|
||||||
|
ON hashtags_objects.hashtag_id = hashtags.id
|
||||||
|
WHERE hashtags_objects.hashtag_id IS NULL AND hashtags.id = ANY($1));
|
||||||
|
"""
|
||||||
|
|
||||||
|
def delete_unreferenced(ids) do
|
||||||
|
with {:ok, %{num_rows: deleted_count}} <- Repo.query(@delete_unreferenced_query, [ids]) do
|
||||||
|
{:ok, deleted_count}
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -74,16 +74,15 @@ def handle_continue(:init_state, _state) do
|
||||||
def handle_info(:migrate_hashtags, state) do
|
def handle_info(:migrate_hashtags, state) do
|
||||||
State.clear()
|
State.clear()
|
||||||
|
|
||||||
data_migration = data_migration()
|
update_status(:running)
|
||||||
|
put_stat(:started_at, NaiveDateTime.utc_now())
|
||||||
|
|
||||||
|
data_migration = data_migration()
|
||||||
persistent_data = Map.take(data_migration.data, ["max_processed_id"])
|
persistent_data = Map.take(data_migration.data, ["max_processed_id"])
|
||||||
|
|
||||||
{:ok, data_migration} =
|
{:ok, data_migration} =
|
||||||
DataMigration.update(data_migration, %{state: :running, data: persistent_data})
|
DataMigration.update(data_migration, %{state: :running, data: persistent_data})
|
||||||
|
|
||||||
update_status(:running)
|
|
||||||
put_stat(:started_at, NaiveDateTime.utc_now())
|
|
||||||
|
|
||||||
Logger.info("Starting transferring object embedded hashtags to `hashtags` table...")
|
Logger.info("Starting transferring object embedded hashtags to `hashtags` table...")
|
||||||
|
|
||||||
max_processed_id = data_migration.data["max_processed_id"] || 0
|
max_processed_id = data_migration.data["max_processed_id"] || 0
|
||||||
|
@ -137,6 +136,8 @@ def handle_info(:migrate_hashtags, state) do
|
||||||
|> Stream.run()
|
|> Stream.run()
|
||||||
|
|
||||||
with 0 <- failures_count(data_migration.id) do
|
with 0 <- failures_count(data_migration.id) do
|
||||||
|
_ = delete_non_create_activities_hashtags()
|
||||||
|
|
||||||
{:ok, data_migration} = DataMigration.update_state(data_migration, :complete)
|
{:ok, data_migration} = DataMigration.update_state(data_migration, :complete)
|
||||||
|
|
||||||
handle_success(data_migration)
|
handle_success(data_migration)
|
||||||
|
@ -150,9 +151,37 @@ def handle_info(:migrate_hashtags, state) do
|
||||||
{:noreply, state}
|
{:noreply, state}
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@hashtags_objects_cleanup_query """
|
||||||
|
DELETE FROM hashtags_objects WHERE object_id IN
|
||||||
|
(SELECT DISTINCT objects.id FROM objects
|
||||||
|
JOIN hashtags_objects ON hashtags_objects.object_id = objects.id LEFT JOIN activities
|
||||||
|
ON COALESCE(activities.data->'object'->>'id', activities.data->>'object') =
|
||||||
|
(objects.data->>'id')
|
||||||
|
AND activities.data->>'type' = 'Create'
|
||||||
|
WHERE activities.id IS NULL);
|
||||||
|
"""
|
||||||
|
|
||||||
|
@hashtags_cleanup_query """
|
||||||
|
DELETE FROM hashtags WHERE id IN
|
||||||
|
(SELECT hashtags.id FROM hashtags
|
||||||
|
LEFT OUTER JOIN hashtags_objects
|
||||||
|
ON hashtags_objects.hashtag_id = hashtags.id
|
||||||
|
WHERE hashtags_objects.hashtag_id IS NULL);
|
||||||
|
"""
|
||||||
|
|
||||||
|
def delete_non_create_activities_hashtags do
|
||||||
|
{:ok, %{num_rows: hashtags_objects_count}} =
|
||||||
|
Repo.query(@hashtags_objects_cleanup_query, [], timeout: :infinity)
|
||||||
|
|
||||||
|
{:ok, %{num_rows: hashtags_count}} =
|
||||||
|
Repo.query(@hashtags_cleanup_query, [], timeout: :infinity)
|
||||||
|
|
||||||
|
{:ok, hashtags_objects_count, hashtags_count}
|
||||||
|
end
|
||||||
|
|
||||||
defp query do
|
defp query do
|
||||||
# Note: most objects have Mention-type AS2 tags and no hashtags (but we can't filter them out)
|
# Note: most objects have Mention-type AS2 tags and no hashtags (but we can't filter them out)
|
||||||
# Note: not checking activity type; HashtagsCleanupWorker should clean up unused records later
|
# Note: not checking activity type, expecting remove_non_create_objects_hashtags/_ to clean up
|
||||||
from(
|
from(
|
||||||
object in Object,
|
object in Object,
|
||||||
where:
|
where:
|
||||||
|
@ -182,25 +211,20 @@ defp transfer_object_hashtags(object) do
|
||||||
defp transfer_object_hashtags(object, hashtags) do
|
defp transfer_object_hashtags(object, hashtags) do
|
||||||
Repo.transaction(fn ->
|
Repo.transaction(fn ->
|
||||||
with {:ok, hashtag_records} <- Hashtag.get_or_create_by_names(hashtags) do
|
with {:ok, hashtag_records} <- Hashtag.get_or_create_by_names(hashtags) do
|
||||||
for hashtag_record <- hashtag_records do
|
maps = Enum.map(hashtag_records, &%{hashtag_id: &1.id, object_id: object.id})
|
||||||
with {:ok, _} <-
|
expected_rows = length(hashtag_records)
|
||||||
Repo.query(
|
|
||||||
"insert into hashtags_objects(hashtag_id, object_id) values ($1, $2);",
|
with {^expected_rows, _} <- Repo.insert_all("hashtags_objects", maps) do
|
||||||
[hashtag_record.id, object.id]
|
object.id
|
||||||
) do
|
|
||||||
nil
|
|
||||||
else
|
else
|
||||||
{:error, e} ->
|
e ->
|
||||||
error =
|
error =
|
||||||
"ERROR: could not link object #{object.id} and hashtag " <>
|
"ERROR when inserting #{expected_rows} hashtags_objects " <>
|
||||||
"#{hashtag_record.id}: #{inspect(e)}"
|
"for object #{object.id}: #{inspect(e)}"
|
||||||
|
|
||||||
Logger.error(error)
|
Logger.error(error)
|
||||||
Repo.rollback(object.id)
|
Repo.rollback(object.id)
|
||||||
end
|
end
|
||||||
end
|
|
||||||
|
|
||||||
object.id
|
|
||||||
else
|
else
|
||||||
e ->
|
e ->
|
||||||
error = "ERROR: could not create hashtags for object #{object.id}: #{inspect(e)}"
|
error = "ERROR: could not create hashtags for object #{object.id}: #{inspect(e)}"
|
||||||
|
|
|
@ -62,27 +62,30 @@ def change(struct, params \\ %{}) do
|
||||||
|> cast(params, [:data])
|
|> cast(params, [:data])
|
||||||
|> validate_required([:data])
|
|> validate_required([:data])
|
||||||
|> unique_constraint(:ap_id, name: :objects_unique_apid_index)
|
|> unique_constraint(:ap_id, name: :objects_unique_apid_index)
|
||||||
|
# Expecting `maybe_handle_hashtags_change/1` to run last:
|
||||||
|> maybe_handle_hashtags_change(struct)
|
|> maybe_handle_hashtags_change(struct)
|
||||||
end
|
end
|
||||||
|
|
||||||
# Note: not checking activity type; HashtagsCleanupWorker should clean up unused records later
|
# Note: not checking activity type (assuming non-legacy objects are associated with Create act.)
|
||||||
defp maybe_handle_hashtags_change(changeset, struct) do
|
defp maybe_handle_hashtags_change(changeset, struct) do
|
||||||
with data_hashtags_change = get_change(changeset, :data),
|
with %Ecto.Changeset{valid?: true} <- changeset,
|
||||||
true <- hashtags_changed?(struct, data_hashtags_change),
|
data_hashtags_change = get_change(changeset, :data),
|
||||||
|
{_, true} <- {:changed, hashtags_changed?(struct, data_hashtags_change)},
|
||||||
{:ok, hashtag_records} <-
|
{:ok, hashtag_records} <-
|
||||||
data_hashtags_change
|
data_hashtags_change
|
||||||
|> object_data_hashtags()
|
|> object_data_hashtags()
|
||||||
|> Hashtag.get_or_create_by_names() do
|
|> Hashtag.get_or_create_by_names() do
|
||||||
put_assoc(changeset, :hashtags, hashtag_records)
|
put_assoc(changeset, :hashtags, hashtag_records)
|
||||||
else
|
else
|
||||||
false ->
|
%{valid?: false} ->
|
||||||
changeset
|
changeset
|
||||||
|
|
||||||
{:error, hashtag_changeset} ->
|
{:changed, false} ->
|
||||||
failed_hashtag = get_field(hashtag_changeset, :name)
|
changeset
|
||||||
|
|
||||||
|
{:error, _} ->
|
||||||
validate_change(changeset, :data, fn _, _ ->
|
validate_change(changeset, :data, fn _, _ ->
|
||||||
[data: "error referencing hashtag: #{failed_hashtag}"]
|
[data: "error referencing hashtags"]
|
||||||
end)
|
end)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -221,9 +224,13 @@ def make_tombstone(%Object{data: %{"id" => id, "type" => type}}, deleted \\ Date
|
||||||
def swap_object_with_tombstone(object) do
|
def swap_object_with_tombstone(object) do
|
||||||
tombstone = make_tombstone(object)
|
tombstone = make_tombstone(object)
|
||||||
|
|
||||||
|
with {:ok, object} <-
|
||||||
object
|
object
|
||||||
|> Object.change(%{data: tombstone})
|
|> Object.change(%{data: tombstone})
|
||||||
|> Repo.update()
|
|> Repo.update() do
|
||||||
|
Hashtag.unlink(object)
|
||||||
|
{:ok, object}
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def delete(%Object{data: %{"id" => id}} = object) do
|
def delete(%Object{data: %{"id" => id}} = object) do
|
||||||
|
|
|
@ -1,57 +0,0 @@
|
||||||
# Pleroma: A lightweight social networking server
|
|
||||||
# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
|
|
||||||
# SPDX-License-Identifier: AGPL-3.0-only
|
|
||||||
|
|
||||||
defmodule Pleroma.Workers.Cron.HashtagsCleanupWorker do
|
|
||||||
@moduledoc """
|
|
||||||
The worker to clean up unused hashtags_objects and hashtags.
|
|
||||||
"""
|
|
||||||
|
|
||||||
use Oban.Worker, queue: "hashtags_cleanup"
|
|
||||||
|
|
||||||
alias Pleroma.Repo
|
|
||||||
|
|
||||||
require Logger
|
|
||||||
|
|
||||||
@hashtags_objects_query """
|
|
||||||
DELETE FROM hashtags_objects WHERE object_id IN
|
|
||||||
(SELECT DISTINCT objects.id FROM objects
|
|
||||||
JOIN hashtags_objects ON hashtags_objects.object_id = objects.id LEFT JOIN activities
|
|
||||||
ON COALESCE(activities.data->'object'->>'id', activities.data->>'object') =
|
|
||||||
(objects.data->>'id')
|
|
||||||
AND activities.data->>'type' = 'Create'
|
|
||||||
WHERE activities.id IS NULL);
|
|
||||||
"""
|
|
||||||
|
|
||||||
@hashtags_query """
|
|
||||||
DELETE FROM hashtags WHERE id IN
|
|
||||||
(SELECT hashtags.id FROM hashtags
|
|
||||||
LEFT OUTER JOIN hashtags_objects
|
|
||||||
ON hashtags_objects.hashtag_id = hashtags.id
|
|
||||||
WHERE hashtags_objects.hashtag_id IS NULL AND hashtags.inserted_at < $1);
|
|
||||||
"""
|
|
||||||
|
|
||||||
@impl Oban.Worker
|
|
||||||
def perform(_job) do
|
|
||||||
Logger.info("Cleaning up unused `hashtags_objects` records...")
|
|
||||||
|
|
||||||
{:ok, %{num_rows: hashtags_objects_count}} =
|
|
||||||
Repo.query(@hashtags_objects_query, [], timeout: :infinity)
|
|
||||||
|
|
||||||
Logger.info("Deleted #{hashtags_objects_count} unused `hashtags_objects` records.")
|
|
||||||
|
|
||||||
Logger.info("Cleaning up unused `hashtags` records...")
|
|
||||||
|
|
||||||
# Note: ignoring recently created hashtags since references are added after hashtag is created
|
|
||||||
{:ok, %{num_rows: hashtags_count}} =
|
|
||||||
Repo.query(@hashtags_query, [NaiveDateTime.add(NaiveDateTime.utc_now(), -3600 * 24)],
|
|
||||||
timeout: :infinity
|
|
||||||
)
|
|
||||||
|
|
||||||
Logger.info("Deleted #{hashtags_count} unused `hashtags` records.")
|
|
||||||
|
|
||||||
Logger.info("HashtagsCleanupWorker complete.")
|
|
||||||
|
|
||||||
:ok
|
|
||||||
end
|
|
||||||
end
|
|
Loading…
Reference in a new issue