Log number of deleted rows in prune_orphaned_activities

This gives feedback when to stop rerunning limited batches.

Most of the diff is just adjusting indentation; best reviewed
with whitespace-only changes hidden, e.g. `git diff -w`.
This commit is contained in:
Oneric 2023-10-23 01:27:56 +02:00
parent fa52093bac
commit e64f031167
2 changed files with 44 additions and 33 deletions

View file

@ -59,6 +59,11 @@ This will prune activities which are no longer referenced by anything.
Such activities might be the result of running `prune_objects` without `--prune-orphaned-activities`. Such activities might be the result of running `prune_objects` without `--prune-orphaned-activities`.
The same notes and warnings apply as for `prune_objects`. The same notes and warnings apply as for `prune_objects`.
The task will print out how many rows were freed in total in its last
line of output in the form `Deleted 345 rows`.
When running the job in limited batches this can be used to determine
when all orphaned activities have been deleted.
=== "OTP" === "OTP"
```sh ```sh

View file

@ -29,40 +29,44 @@ def prune_orphaned_activities(limit \\ 0) when is_number(limit) do
end end
# Prune activities who link to a single object # Prune activities who link to a single object
""" {:ok, %{:num_rows => del_single}} =
delete from public.activities """
where id in ( delete from public.activities
select a.id from public.activities a where id in (
left join public.objects o on a.data ->> 'object' = o.data ->> 'id' select a.id from public.activities a
left join public.activities a2 on a.data ->> 'object' = a2.data ->> 'id' left join public.objects o on a.data ->> 'object' = o.data ->> 'id'
left join public.users u on a.data ->> 'object' = u.ap_id left join public.activities a2 on a.data ->> 'object' = a2.data ->> 'id'
where not a.local left join public.users u on a.data ->> 'object' = u.ap_id
and jsonb_typeof(a."data" -> 'object') = 'string' where not a.local
and o.id is null and jsonb_typeof(a."data" -> 'object') = 'string'
and a2.id is null and o.id is null
and u.id is null and a2.id is null
#{limit_arg} and u.id is null
) #{limit_arg}
""" )
|> Repo.query([], timeout: :infinity) """
|> Repo.query([], timeout: :infinity)
# Prune activities who link to an array of objects # Prune activities who link to an array of objects
""" {:ok, %{:num_rows => del_array}} =
delete from public.activities """
where id in ( delete from public.activities
select a.id from public.activities a where id in (
join json_array_elements_text((a."data" -> 'object')::json) as j on jsonb_typeof(a."data" -> 'object') = 'array' select a.id from public.activities a
left join public.objects o on j.value = o.data ->> 'id' join json_array_elements_text((a."data" -> 'object')::json) as j on jsonb_typeof(a."data" -> 'object') = 'array'
left join public.activities a2 on j.value = a2.data ->> 'id' left join public.objects o on j.value = o.data ->> 'id'
left join public.users u on j.value = u.ap_id left join public.activities a2 on j.value = a2.data ->> 'id'
group by a.id left join public.users u on j.value = u.ap_id
having max(o.data ->> 'id') is null group by a.id
and max(a2.data ->> 'id') is null having max(o.data ->> 'id') is null
and max(u.ap_id) is null and max(a2.data ->> 'id') is null
#{limit_arg} and max(u.ap_id) is null
) #{limit_arg}
""" )
|> Repo.query([], timeout: :infinity) """
|> Repo.query([], timeout: :infinity)
del_single + del_array
end end
def run(["remove_embedded_objects" | args]) do def run(["remove_embedded_objects" | args]) do
@ -131,7 +135,9 @@ def run(["prune_orphaned_activities" | args]) do
Logger.info(log_message) Logger.info(log_message)
prune_orphaned_activities(limit) deleted = prune_orphaned_activities(limit)
Logger.info("Deleted #{deleted} rows")
end end
def run(["prune_objects" | args]) do def run(["prune_objects" | args]) do