From 3126d15ffc54fc0f67ac9c903aa04931e7961f4e Mon Sep 17 00:00:00 2001 From: Oneric Date: Mon, 23 Oct 2023 00:52:34 +0200 Subject: [PATCH 01/11] refactor: move prune_orphaned_activities into own function No logic changes. Preparation for standalone orphan pruning. --- lib/mix/tasks/pleroma/database.ex | 70 ++++++++++++++++--------------- 1 file changed, 37 insertions(+), 33 deletions(-) diff --git a/lib/mix/tasks/pleroma/database.ex b/lib/mix/tasks/pleroma/database.ex index 09d2a4072..20d035dfd 100644 --- a/lib/mix/tasks/pleroma/database.ex +++ b/lib/mix/tasks/pleroma/database.ex @@ -20,6 +20,42 @@ defmodule Mix.Tasks.Pleroma.Database do @shortdoc "A collection of database related tasks" @moduledoc File.read!("docs/docs/administration/CLI_tasks/database.md") + def prune_orphaned_activities() do + # Prune activities who link to a single object + """ + delete from public.activities + where id in ( + select a.id from public.activities a + left join public.objects o on a.data ->> 'object' = o.data ->> 'id' + left join public.activities a2 on a.data ->> 'object' = a2.data ->> 'id' + left join public.users u on a.data ->> 'object' = u.ap_id + where not a.local + and jsonb_typeof(a."data" -> 'object') = 'string' + and o.id is null + and a2.id is null + and u.id is null + ) + """ + |> Repo.query([], timeout: :infinity) + + # Prune activities who link to an array of objects + """ + delete from public.activities + where id in ( + select a.id from public.activities a + join json_array_elements_text((a."data" -> 'object')::json) as j on jsonb_typeof(a."data" -> 'object') = 'array' + left join public.objects o on j.value = o.data ->> 'id' + left join public.activities a2 on j.value = a2.data ->> 'id' + left join public.users u on j.value = u.ap_id + group by a.id + having max(o.data ->> 'id') is null + and max(a2.data ->> 'id') is null + and max(u.ap_id) is null + ) + """ + |> Repo.query([], timeout: :infinity) + end + def run(["remove_embedded_objects" | args]) do {options, [], []} = OptionParser.parse( @@ -187,39 +223,7 @@ def run(["prune_objects" | args]) do end if Keyword.get(options, :prune_orphaned_activities) do - # Prune activities who link to a single object - """ - delete from public.activities - where id in ( - select a.id from public.activities a - left join public.objects o on a.data ->> 'object' = o.data ->> 'id' - left join public.activities a2 on a.data ->> 'object' = a2.data ->> 'id' - left join public.users u on a.data ->> 'object' = u.ap_id - where not a.local - and jsonb_typeof(a."data" -> 'object') = 'string' - and o.id is null - and a2.id is null - and u.id is null - ) - """ - |> Repo.query([], timeout: :infinity) - - # Prune activities who link to an array of objects - """ - delete from public.activities - where id in ( - select a.id from public.activities a - join json_array_elements_text((a."data" -> 'object')::json) as j on jsonb_typeof(a."data" -> 'object') = 'array' - left join public.objects o on j.value = o.data ->> 'id' - left join public.activities a2 on j.value = a2.data ->> 'id' - left join public.users u on j.value = u.ap_id - group by a.id - having max(o.data ->> 'id') is null - and max(a2.data ->> 'id') is null - and max(u.ap_id) is null - ) - """ - |> Repo.query([], timeout: :infinity) + prune_orphaned_activities() end """ -- 2.43.0 From fa52093bacc106945bf28b1fae98535ab5803151 Mon Sep 17 00:00:00 2001 From: Oneric Date: Mon, 23 Oct 2023 01:01:07 +0200 Subject: [PATCH 02/11] Add standalone prune_orphaned_activities CLI task This part of pruning can be very expensive and bog down the whole instance to an unusable sate for a long time. It can thus be desireable to split it from prune_objects and run it on its own in smaller limited batches. If the batches are smaller enough and spaced out a bit, it may even be possible to avoid any downtime. If not, the limit can still help to at least make the downtime duration somewhat more predictable. --- CHANGELOG.md | 1 + .../docs/administration/CLI_tasks/database.md | 22 +++++++++++ lib/mix/tasks/pleroma/database.ex | 38 ++++++++++++++++++- 3 files changed, 60 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9a272d4a2..42411f491 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -106,6 +106,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - Akkoma API is now documented - ability to auto-approve follow requests from users you are already following - The SimplePolicy MRF can now strip user backgrounds from selected remote hosts +- New standalone `prune_orphaned_activities` mix task with configurable batch limit ## Changed - OTP builds are now built on erlang OTP26 diff --git a/docs/docs/administration/CLI_tasks/database.md b/docs/docs/administration/CLI_tasks/database.md index 3d7424d1c..eba56da10 100644 --- a/docs/docs/administration/CLI_tasks/database.md +++ b/docs/docs/administration/CLI_tasks/database.md @@ -53,6 +53,28 @@ This will prune remote posts older than 90 days (configurable with [`config :ple - `--prune-orphaned-activities` - Also prune orphaned activities afterwards. Activities are things like Like, Create, Announce, Flag (aka reports)... They can significantly help reduce the database size. - `--vacuum` - Run `VACUUM FULL` after the objects are pruned. This should not be used on a regular basis, but is useful if your instance has been running for a long time before pruning. +## Prune orphaned activities from the database + +This will prune activities which are no longer referenced by anything. +Such activities might be the result of running `prune_objects` without `--prune-orphaned-activities`. +The same notes and warnings apply as for `prune_objects`. + +=== "OTP" + + ```sh + ./bin/pleroma_ctl database prune_orphaned_activities [option ...] + ``` + +=== "From Source" + + ```sh + mix pleroma.database prune_orphaned_activities [option ...] + ``` + +### Options + +- `--limit n` - Only delete up to `n` activities in each query making up this job, i.e. if this job runs two queries at most `2n` activities will be deleted. Running this task repeatedly in limited batches can help maintain the instance’s responsiveness while still freeing up some space. + ## Create a conversation for all existing DMs Can be safely re-run diff --git a/lib/mix/tasks/pleroma/database.ex b/lib/mix/tasks/pleroma/database.ex index 20d035dfd..8ded7bbec 100644 --- a/lib/mix/tasks/pleroma/database.ex +++ b/lib/mix/tasks/pleroma/database.ex @@ -20,7 +20,14 @@ defmodule Mix.Tasks.Pleroma.Database do @shortdoc "A collection of database related tasks" @moduledoc File.read!("docs/docs/administration/CLI_tasks/database.md") - def prune_orphaned_activities() do + def prune_orphaned_activities(limit \\ 0) when is_number(limit) do + limit_arg = + if limit > 0 do + "LIMIT #{limit}" + else + "" + end + # Prune activities who link to a single object """ delete from public.activities @@ -34,6 +41,7 @@ def prune_orphaned_activities() do and o.id is null and a2.id is null and u.id is null + #{limit_arg} ) """ |> Repo.query([], timeout: :infinity) @@ -51,6 +59,7 @@ def prune_orphaned_activities() do having max(o.data ->> 'id') is null and max(a2.data ->> 'id') is null and max(u.ap_id) is null + #{limit_arg} ) """ |> Repo.query([], timeout: :infinity) @@ -98,6 +107,33 @@ def run(["update_users_following_followers_counts"]) do ) end + def run(["prune_orphaned_activities" | args]) do + {options, [], []} = + OptionParser.parse( + args, + strict: [ + limit: :integer + ] + ) + + start_pleroma() + + limit = Keyword.get(options, :limit, 0) + + log_message = "Pruning orphaned activities" + + log_message = + if limit > 0 do + log_message <> ", limiting deletion to #{limit} rows" + else + log_message + end + + Logger.info(log_message) + + prune_orphaned_activities(limit) + end + def run(["prune_objects" | args]) do {options, [], []} = OptionParser.parse( -- 2.43.0 From e64f031167a855b36298dbda988d6bde210299a4 Mon Sep 17 00:00:00 2001 From: Oneric Date: Mon, 23 Oct 2023 01:27:56 +0200 Subject: [PATCH 03/11] Log number of deleted rows in prune_orphaned_activities This gives feedback when to stop rerunning limited batches. Most of the diff is just adjusting indentation; best reviewed with whitespace-only changes hidden, e.g. `git diff -w`. --- .../docs/administration/CLI_tasks/database.md | 5 ++ lib/mix/tasks/pleroma/database.ex | 72 ++++++++++--------- 2 files changed, 44 insertions(+), 33 deletions(-) diff --git a/docs/docs/administration/CLI_tasks/database.md b/docs/docs/administration/CLI_tasks/database.md index eba56da10..c57817bf4 100644 --- a/docs/docs/administration/CLI_tasks/database.md +++ b/docs/docs/administration/CLI_tasks/database.md @@ -59,6 +59,11 @@ This will prune activities which are no longer referenced by anything. Such activities might be the result of running `prune_objects` without `--prune-orphaned-activities`. The same notes and warnings apply as for `prune_objects`. +The task will print out how many rows were freed in total in its last +line of output in the form `Deleted 345 rows`. +When running the job in limited batches this can be used to determine +when all orphaned activities have been deleted. + === "OTP" ```sh diff --git a/lib/mix/tasks/pleroma/database.ex b/lib/mix/tasks/pleroma/database.ex index 8ded7bbec..083f73fe2 100644 --- a/lib/mix/tasks/pleroma/database.ex +++ b/lib/mix/tasks/pleroma/database.ex @@ -29,40 +29,44 @@ def prune_orphaned_activities(limit \\ 0) when is_number(limit) do end # Prune activities who link to a single object - """ - delete from public.activities - where id in ( - select a.id from public.activities a - left join public.objects o on a.data ->> 'object' = o.data ->> 'id' - left join public.activities a2 on a.data ->> 'object' = a2.data ->> 'id' - left join public.users u on a.data ->> 'object' = u.ap_id - where not a.local - and jsonb_typeof(a."data" -> 'object') = 'string' - and o.id is null - and a2.id is null - and u.id is null - #{limit_arg} - ) - """ - |> Repo.query([], timeout: :infinity) + {:ok, %{:num_rows => del_single}} = + """ + delete from public.activities + where id in ( + select a.id from public.activities a + left join public.objects o on a.data ->> 'object' = o.data ->> 'id' + left join public.activities a2 on a.data ->> 'object' = a2.data ->> 'id' + left join public.users u on a.data ->> 'object' = u.ap_id + where not a.local + and jsonb_typeof(a."data" -> 'object') = 'string' + and o.id is null + and a2.id is null + and u.id is null + #{limit_arg} + ) + """ + |> Repo.query([], timeout: :infinity) # Prune activities who link to an array of objects - """ - delete from public.activities - where id in ( - select a.id from public.activities a - join json_array_elements_text((a."data" -> 'object')::json) as j on jsonb_typeof(a."data" -> 'object') = 'array' - left join public.objects o on j.value = o.data ->> 'id' - left join public.activities a2 on j.value = a2.data ->> 'id' - left join public.users u on j.value = u.ap_id - group by a.id - having max(o.data ->> 'id') is null - and max(a2.data ->> 'id') is null - and max(u.ap_id) is null - #{limit_arg} - ) - """ - |> Repo.query([], timeout: :infinity) + {:ok, %{:num_rows => del_array}} = + """ + delete from public.activities + where id in ( + select a.id from public.activities a + join json_array_elements_text((a."data" -> 'object')::json) as j on jsonb_typeof(a."data" -> 'object') = 'array' + left join public.objects o on j.value = o.data ->> 'id' + left join public.activities a2 on j.value = a2.data ->> 'id' + left join public.users u on j.value = u.ap_id + group by a.id + having max(o.data ->> 'id') is null + and max(a2.data ->> 'id') is null + and max(u.ap_id) is null + #{limit_arg} + ) + """ + |> Repo.query([], timeout: :infinity) + + del_single + del_array end def run(["remove_embedded_objects" | args]) do @@ -131,7 +135,9 @@ def run(["prune_orphaned_activities" | args]) do Logger.info(log_message) - prune_orphaned_activities(limit) + deleted = prune_orphaned_activities(limit) + + Logger.info("Deleted #{deleted} rows") end def run(["prune_objects" | args]) do -- 2.43.0 From 225f87ad62902cce8db71d30c23801fd9ed7ee05 Mon Sep 17 00:00:00 2001 From: Oneric Date: Mon, 23 Oct 2023 17:29:02 +0200 Subject: [PATCH 04/11] Also allow limiting the initial prune_object May sometimes be helpful to get more predictable runtime than just with an age-based limit. The subquery for the non-keep-threads path is required since delte_all does not directly accept limit(). Again most of the diff is just adjusting indentation, best hide whitespace-only changes with git diff -w or similar. --- CHANGELOG.md | 1 + .../docs/administration/CLI_tasks/database.md | 1 + lib/mix/tasks/pleroma/database.ex | 61 +++++++++++++------ 3 files changed, 45 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 42411f491..aea6dc677 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -107,6 +107,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - ability to auto-approve follow requests from users you are already following - The SimplePolicy MRF can now strip user backgrounds from selected remote hosts - New standalone `prune_orphaned_activities` mix task with configurable batch limit +- The `prune_objects` mix task now accepts a `--limit` parameter for initial object pruning ## Changed - OTP builds are now built on erlang OTP26 diff --git a/docs/docs/administration/CLI_tasks/database.md b/docs/docs/administration/CLI_tasks/database.md index c57817bf4..bbf29fc60 100644 --- a/docs/docs/administration/CLI_tasks/database.md +++ b/docs/docs/administration/CLI_tasks/database.md @@ -50,6 +50,7 @@ This will prune remote posts older than 90 days (configurable with [`config :ple - `--keep-threads` - Don't prune posts when they are part of a thread where at least one post has seen local interaction (e.g. one of the posts is a local post, or is favourited by a local user, or has been repeated by a local user...). It also wont delete posts when at least one of the posts in that thread is kept (e.g. because one of the posts has seen recent activity). - `--keep-non-public` - Keep non-public posts like DM's and followers-only, even if they are remote. +- `--limit` - limits how many remote posts get pruned. This limit does **not** apply to any of the follow up jobs. If wanting to keep the database load in check it is thus advisable to run the standalone `prune_orphaned_activities` task with a limit afterwards instead of passing `--prune-orphaned-activities` to this task. - `--prune-orphaned-activities` - Also prune orphaned activities afterwards. Activities are things like Like, Create, Announce, Flag (aka reports)... They can significantly help reduce the database size. - `--vacuum` - Run `VACUUM FULL` after the objects are pruned. This should not be used on a regular basis, but is useful if your instance has been running for a long time before pruning. diff --git a/lib/mix/tasks/pleroma/database.ex b/lib/mix/tasks/pleroma/database.ex index 083f73fe2..b8f19551a 100644 --- a/lib/mix/tasks/pleroma/database.ex +++ b/lib/mix/tasks/pleroma/database.ex @@ -20,6 +20,14 @@ defmodule Mix.Tasks.Pleroma.Database do @shortdoc "A collection of database related tasks" @moduledoc File.read!("docs/docs/administration/CLI_tasks/database.md") + defp maybe_limit(query, limit_cnt) do + if is_number(limit_cnt) and limit_cnt > 0 do + limit(query, [], ^limit_cnt) + else + query + end + end + def prune_orphaned_activities(limit \\ 0) when is_number(limit) do limit_arg = if limit > 0 do @@ -148,7 +156,8 @@ def run(["prune_objects" | args]) do vacuum: :boolean, keep_threads: :boolean, keep_non_public: :boolean, - prune_orphaned_activities: :boolean + prune_orphaned_activities: :boolean, + limit: :integer ] ) @@ -157,6 +166,8 @@ def run(["prune_objects" | args]) do deadline = Pleroma.Config.get([:instance, :remote_post_retention_days]) time_deadline = NaiveDateTime.utc_now() |> NaiveDateTime.add(-(deadline * 86_400)) + limit_cnt = Keyword.get(options, :limit, 0) + log_message = "Pruning objects older than #{deadline} days" log_message = @@ -188,6 +199,13 @@ def run(["prune_objects" | args]) do log_message end + log_message = + if limit_cnt > 0 do + log_message <> ", limiting to #{limit_cnt} rows" + else + log_message + end + Logger.info(log_message) if Keyword.get(options, :keep_threads) do @@ -221,31 +239,38 @@ def run(["prune_objects" | args]) do |> having([a], max(a.updated_at) < ^time_deadline) |> having([a], not fragment("bool_or(?)", a.local)) |> having([_, b], fragment("max(?::text) is null", b.id)) + |> maybe_limit(limit_cnt) |> select([a], fragment("? ->> 'context'::text", a.data)) Pleroma.Object |> where([o], fragment("? ->> 'context'::text", o.data) in subquery(deletable_context)) else - if Keyword.get(options, :keep_non_public) do - Pleroma.Object + deletable = + if Keyword.get(options, :keep_non_public) do + Pleroma.Object + |> where( + [o], + fragment( + "?->'to' \\? ? OR ?->'cc' \\? ?", + o.data, + ^Pleroma.Constants.as_public(), + o.data, + ^Pleroma.Constants.as_public() + ) + ) + else + Pleroma.Object + end + |> where([o], o.updated_at < ^time_deadline) |> where( [o], - fragment( - "?->'to' \\? ? OR ?->'cc' \\? ?", - o.data, - ^Pleroma.Constants.as_public(), - o.data, - ^Pleroma.Constants.as_public() - ) + fragment("split_part(?->>'actor', '/', 3) != ?", o.data, ^Pleroma.Web.Endpoint.host()) ) - else - Pleroma.Object - end - |> where([o], o.updated_at < ^time_deadline) - |> where( - [o], - fragment("split_part(?->>'actor', '/', 3) != ?", o.data, ^Pleroma.Web.Endpoint.host()) - ) + |> maybe_limit(limit_cnt) + |> select([o], o.id) + + Pleroma.Object + |> where([o], o.id in subquery(deletable)) end |> Repo.delete_all(timeout: :infinity) -- 2.43.0 From 6e7cbf1885359a028231ec31ca65633c02ac7ba0 Mon Sep 17 00:00:00 2001 From: Oneric Date: Sat, 10 Feb 2024 03:03:13 +0100 Subject: [PATCH 05/11] Test both standalone and flag mode for pruning orphaned activities --- test/mix/tasks/pleroma/database_test.exs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/mix/tasks/pleroma/database_test.exs b/test/mix/tasks/pleroma/database_test.exs index f28bef51c..18b4752bf 100644 --- a/test/mix/tasks/pleroma/database_test.exs +++ b/test/mix/tasks/pleroma/database_test.exs @@ -470,7 +470,7 @@ test "it prunes orphaned activities with the --prune-orphaned-activities" do assert length(activities) == 4 end - test "it prunes orphaned activities with the --prune-orphaned-activities when the objects are referenced from an array" do + test "it prunes orphaned activities with prune_orphaned_activities when the objects are referenced from an array" do %Object{} |> Map.merge(%{data: %{"id" => "existing_object"}}) |> Repo.insert() %User{} |> Map.merge(%{ap_id: "existing_actor"}) |> Repo.insert() @@ -517,7 +517,7 @@ test "it prunes orphaned activities with the --prune-orphaned-activities when th assert length(Repo.all(Activity)) == 4 Mix.Tasks.Pleroma.Database.run(["prune_objects"]) assert length(Repo.all(Activity)) == 4 - Mix.Tasks.Pleroma.Database.run(["prune_objects", "--prune-orphaned-activities"]) + Mix.Tasks.Pleroma.Database.run(["prune_orphaned_activities"]) activities = Repo.all(Activity) assert length(activities) == 3 -- 2.43.0 From 1d4c212441d5162c05b4968ff9c64ee0219bf44b Mon Sep 17 00:00:00 2001 From: Oneric Date: Wed, 15 May 2024 01:20:27 +0200 Subject: [PATCH 06/11] dbprune: shortcut array activity search This brought down query costs from 7,953,740.90 to 47,600.97 --- lib/mix/tasks/pleroma/database.ex | 16 +++++++++++++++- test/mix/tasks/pleroma/database_test.exs | 4 ++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/lib/mix/tasks/pleroma/database.ex b/lib/mix/tasks/pleroma/database.ex index b8f19551a..ac0f15a33 100644 --- a/lib/mix/tasks/pleroma/database.ex +++ b/lib/mix/tasks/pleroma/database.ex @@ -36,6 +36,19 @@ def prune_orphaned_activities(limit \\ 0) when is_number(limit) do "" end + # Activities can either refer to a single object id, and array of object ids + # or contain an inlined object (at least after going through our normalisation) + # + # Flag is the only type we support with an array (and always has arrays). + # Update the only one with inlined objects. + # + # We already regularly purge old Delete, Undo, Update and Remove and if + # rejected Follow requests anyway; no need to explicitly deal with those here. + # + # Since there’s an index on types and there are typically only few Flag + # activites, it’s _much_ faster to utilise the index. To avoid accidentally + # deleting useful activities should more types be added, keep typeof for singles. + # Prune activities who link to a single object {:ok, %{:num_rows => del_single}} = """ @@ -61,7 +74,8 @@ def prune_orphaned_activities(limit \\ 0) when is_number(limit) do delete from public.activities where id in ( select a.id from public.activities a - join json_array_elements_text((a."data" -> 'object')::json) as j on jsonb_typeof(a."data" -> 'object') = 'array' + join json_array_elements_text((a."data" -> 'object')::json) as j + on a.data->>'type' = 'Flag' left join public.objects o on j.value = o.data ->> 'id' left join public.activities a2 on j.value = a2.data ->> 'id' left join public.users u on j.value = u.ap_id diff --git a/test/mix/tasks/pleroma/database_test.exs b/test/mix/tasks/pleroma/database_test.exs index 18b4752bf..e92ffbf98 100644 --- a/test/mix/tasks/pleroma/database_test.exs +++ b/test/mix/tasks/pleroma/database_test.exs @@ -478,6 +478,7 @@ test "it prunes orphaned activities with prune_orphaned_activities when the obje |> Map.merge(%{ local: false, data: %{ + "type" => "Flag", "id" => "remote_activity_existing_object", "object" => ["non_ existing_object", "existing_object"] } @@ -488,6 +489,7 @@ test "it prunes orphaned activities with prune_orphaned_activities when the obje |> Map.merge(%{ local: false, data: %{ + "type" => "Flag", "id" => "remote_activity_existing_actor", "object" => ["non_ existing_object", "existing_actor"] } @@ -498,6 +500,7 @@ test "it prunes orphaned activities with prune_orphaned_activities when the obje |> Map.merge(%{ local: false, data: %{ + "type" => "Flag", "id" => "remote_activity_existing_activity", "object" => ["non_ existing_object", "remote_activity_existing_actor"] } @@ -508,6 +511,7 @@ test "it prunes orphaned activities with prune_orphaned_activities when the obje |> Map.merge(%{ local: false, data: %{ + "type" => "Flag", "id" => "remote_activity_without_existing_referenced_object", "object" => ["owo", "whats_this"] } -- 2.43.0 From 24bab63cd8a5b9fca70c9632776a196b74c1e352 Mon Sep 17 00:00:00 2001 From: Oneric Date: Wed, 15 May 2024 01:33:41 +0200 Subject: [PATCH 07/11] dbprune: add more logs Pruning can go on for a long time; give admins some insight into that something is happening to make it less frustrating and to make it easier which part of the process is stalled should this happen. Again most of the changes are merely reindents; review with whitespace changes hidden recommended. --- lib/mix/tasks/pleroma/database.ex | 171 ++++++++++++++++-------------- 1 file changed, 94 insertions(+), 77 deletions(-) diff --git a/lib/mix/tasks/pleroma/database.ex b/lib/mix/tasks/pleroma/database.ex index ac0f15a33..8d3673c78 100644 --- a/lib/mix/tasks/pleroma/database.ex +++ b/lib/mix/tasks/pleroma/database.ex @@ -68,6 +68,8 @@ def prune_orphaned_activities(limit \\ 0) when is_number(limit) do """ |> Repo.query([], timeout: :infinity) + Logger.info("Prune activity singles: deleted #{del_single} rows...") + # Prune activities who link to an array of objects {:ok, %{:num_rows => del_array}} = """ @@ -88,6 +90,8 @@ def prune_orphaned_activities(limit \\ 0) when is_number(limit) do """ |> Repo.query([], timeout: :infinity) + Logger.info("Prune activity arrays: deleted #{del_array} rows...") + del_single + del_array end @@ -222,102 +226,115 @@ def run(["prune_objects" | args]) do Logger.info(log_message) - if Keyword.get(options, :keep_threads) do - # We want to delete objects from threads where - # 1. the newest post is still old - # 2. none of the activities is local - # 3. none of the activities is bookmarked - # 4. optionally none of the posts is non-public - deletable_context = - if Keyword.get(options, :keep_non_public) do - Pleroma.Activity - |> join(:left, [a], b in Pleroma.Bookmark, on: a.id == b.activity_id) - |> group_by([a], fragment("? ->> 'context'::text", a.data)) - |> having( - [a], - not fragment( - # Posts (checked on Create Activity) is non-public - "bool_or((not(?->'to' \\? ? OR ?->'cc' \\? ?)) and ? ->> 'type' = 'Create')", - a.data, - ^Pleroma.Constants.as_public(), - a.data, - ^Pleroma.Constants.as_public(), - a.data + {del_obj, _} = + if Keyword.get(options, :keep_threads) do + # We want to delete objects from threads where + # 1. the newest post is still old + # 2. none of the activities is local + # 3. none of the activities is bookmarked + # 4. optionally none of the posts is non-public + deletable_context = + if Keyword.get(options, :keep_non_public) do + Pleroma.Activity + |> join(:left, [a], b in Pleroma.Bookmark, on: a.id == b.activity_id) + |> group_by([a], fragment("? ->> 'context'::text", a.data)) + |> having( + [a], + not fragment( + # Posts (checked on Create Activity) is non-public + "bool_or((not(?->'to' \\? ? OR ?->'cc' \\? ?)) and ? ->> 'type' = 'Create')", + a.data, + ^Pleroma.Constants.as_public(), + a.data, + ^Pleroma.Constants.as_public(), + a.data + ) ) - ) - else - Pleroma.Activity - |> join(:left, [a], b in Pleroma.Bookmark, on: a.id == b.activity_id) - |> group_by([a], fragment("? ->> 'context'::text", a.data)) - end - |> having([a], max(a.updated_at) < ^time_deadline) - |> having([a], not fragment("bool_or(?)", a.local)) - |> having([_, b], fragment("max(?::text) is null", b.id)) - |> maybe_limit(limit_cnt) - |> select([a], fragment("? ->> 'context'::text", a.data)) + else + Pleroma.Activity + |> join(:left, [a], b in Pleroma.Bookmark, on: a.id == b.activity_id) + |> group_by([a], fragment("? ->> 'context'::text", a.data)) + end + |> having([a], max(a.updated_at) < ^time_deadline) + |> having([a], not fragment("bool_or(?)", a.local)) + |> having([_, b], fragment("max(?::text) is null", b.id)) + |> maybe_limit(limit_cnt) + |> select([a], fragment("? ->> 'context'::text", a.data)) - Pleroma.Object - |> where([o], fragment("? ->> 'context'::text", o.data) in subquery(deletable_context)) - else - deletable = - if Keyword.get(options, :keep_non_public) do - Pleroma.Object + Pleroma.Object + |> where([o], fragment("? ->> 'context'::text", o.data) in subquery(deletable_context)) + else + deletable = + if Keyword.get(options, :keep_non_public) do + Pleroma.Object + |> where( + [o], + fragment( + "?->'to' \\? ? OR ?->'cc' \\? ?", + o.data, + ^Pleroma.Constants.as_public(), + o.data, + ^Pleroma.Constants.as_public() + ) + ) + else + Pleroma.Object + end + |> where([o], o.updated_at < ^time_deadline) |> where( [o], - fragment( - "?->'to' \\? ? OR ?->'cc' \\? ?", - o.data, - ^Pleroma.Constants.as_public(), - o.data, - ^Pleroma.Constants.as_public() - ) + fragment("split_part(?->>'actor', '/', 3) != ?", o.data, ^Pleroma.Web.Endpoint.host()) ) - else - Pleroma.Object - end - |> where([o], o.updated_at < ^time_deadline) - |> where( - [o], - fragment("split_part(?->>'actor', '/', 3) != ?", o.data, ^Pleroma.Web.Endpoint.host()) - ) - |> maybe_limit(limit_cnt) - |> select([o], o.id) + |> maybe_limit(limit_cnt) + |> select([o], o.id) - Pleroma.Object - |> where([o], o.id in subquery(deletable)) - end - |> Repo.delete_all(timeout: :infinity) + Pleroma.Object + |> where([o], o.id in subquery(deletable)) + end + |> Repo.delete_all(timeout: :infinity) + + Logger.info("Deleted #{del_obj} objects...") if !Keyword.get(options, :keep_threads) do # Without the --keep-threads option, it's possible that bookmarked # objects have been deleted. We remove the corresponding bookmarks. - """ - delete from public.bookmarks - where id in ( - select b.id from public.bookmarks b - left join public.activities a on b.activity_id = a.id - left join public.objects o on a."data" ->> 'object' = o.data ->> 'id' - where o.id is null - ) - """ - |> Repo.query([], timeout: :infinity) + {:ok, %{:num_rows => del_bookmarks}} = + """ + delete from public.bookmarks + where id in ( + select b.id from public.bookmarks b + left join public.activities a on b.activity_id = a.id + left join public.objects o on a."data" ->> 'object' = o.data ->> 'id' + where o.id is null + ) + """ + |> Repo.query([], timeout: :infinity) + + Logger.info("Deleted #{del_bookmarks} orphaned bookmarks...") end if Keyword.get(options, :prune_orphaned_activities) do - prune_orphaned_activities() + del_activities = prune_orphaned_activities() + Logger.info("Deleted #{del_activities} orphaned activities...") end - """ - DELETE FROM hashtags AS ht - WHERE NOT EXISTS ( - SELECT 1 FROM hashtags_objects hto - WHERE ht.id = hto.hashtag_id) - """ - |> Repo.query() + {:ok, %{:num_rows => del_hashtags}} = + """ + DELETE FROM hashtags AS ht + WHERE NOT EXISTS ( + SELECT 1 FROM hashtags_objects hto + WHERE ht.id = hto.hashtag_id) + """ + |> Repo.query() + + Logger.info("Deleted #{del_hashtags} no longer used hashtags...") if Keyword.get(options, :vacuum) do + Logger.info("Starting vacuum...") Maintenance.vacuum("full") end + + Logger.info("All done!") end def run(["prune_task"]) do -- 2.43.0 From 5751637926df143ff6ff9d3b6d203f9693f1e774 Mon Sep 17 00:00:00 2001 From: Oneric Date: Wed, 15 May 2024 01:38:59 +0200 Subject: [PATCH 08/11] dbprune: use query! --- lib/mix/tasks/pleroma/database.ex | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/lib/mix/tasks/pleroma/database.ex b/lib/mix/tasks/pleroma/database.ex index 8d3673c78..b4709fa3a 100644 --- a/lib/mix/tasks/pleroma/database.ex +++ b/lib/mix/tasks/pleroma/database.ex @@ -50,7 +50,7 @@ def prune_orphaned_activities(limit \\ 0) when is_number(limit) do # deleting useful activities should more types be added, keep typeof for singles. # Prune activities who link to a single object - {:ok, %{:num_rows => del_single}} = + %{:num_rows => del_single} = """ delete from public.activities where id in ( @@ -66,12 +66,12 @@ def prune_orphaned_activities(limit \\ 0) when is_number(limit) do #{limit_arg} ) """ - |> Repo.query([], timeout: :infinity) + |> Repo.query!([], timeout: :infinity) Logger.info("Prune activity singles: deleted #{del_single} rows...") # Prune activities who link to an array of objects - {:ok, %{:num_rows => del_array}} = + %{:num_rows => del_array} = """ delete from public.activities where id in ( @@ -88,7 +88,7 @@ def prune_orphaned_activities(limit \\ 0) when is_number(limit) do #{limit_arg} ) """ - |> Repo.query([], timeout: :infinity) + |> Repo.query!([], timeout: :infinity) Logger.info("Prune activity arrays: deleted #{del_array} rows...") @@ -298,7 +298,7 @@ def run(["prune_objects" | args]) do if !Keyword.get(options, :keep_threads) do # Without the --keep-threads option, it's possible that bookmarked # objects have been deleted. We remove the corresponding bookmarks. - {:ok, %{:num_rows => del_bookmarks}} = + %{:num_rows => del_bookmarks} = """ delete from public.bookmarks where id in ( @@ -308,7 +308,7 @@ def run(["prune_objects" | args]) do where o.id is null ) """ - |> Repo.query([], timeout: :infinity) + |> Repo.query!([], timeout: :infinity) Logger.info("Deleted #{del_bookmarks} orphaned bookmarks...") end @@ -318,14 +318,14 @@ def run(["prune_objects" | args]) do Logger.info("Deleted #{del_activities} orphaned activities...") end - {:ok, %{:num_rows => del_hashtags}} = + %{:num_rows => del_hashtags} = """ DELETE FROM hashtags AS ht WHERE NOT EXISTS ( SELECT 1 FROM hashtags_objects hto WHERE ht.id = hto.hashtag_id) """ - |> Repo.query() + |> Repo.query!() Logger.info("Deleted #{del_hashtags} no longer used hashtags...") -- 2.43.0 From aeaebb566c9e9e5b34075b2700a5fb4bd0d0e3c9 Mon Sep 17 00:00:00 2001 From: Oneric Date: Wed, 15 May 2024 02:15:31 +0200 Subject: [PATCH 09/11] dbprune: allow splitting array and single activity prunes The former is typically just a few reports; it doesn't make sense to rerun it over and over again in batched prunes or if a full prune OOMed. --- .../docs/administration/CLI_tasks/database.md | 2 + lib/mix/tasks/pleroma/database.ex | 77 ++++++++++++------- 2 files changed, 52 insertions(+), 27 deletions(-) diff --git a/docs/docs/administration/CLI_tasks/database.md b/docs/docs/administration/CLI_tasks/database.md index bbf29fc60..580c9d32b 100644 --- a/docs/docs/administration/CLI_tasks/database.md +++ b/docs/docs/administration/CLI_tasks/database.md @@ -80,6 +80,8 @@ when all orphaned activities have been deleted. ### Options - `--limit n` - Only delete up to `n` activities in each query making up this job, i.e. if this job runs two queries at most `2n` activities will be deleted. Running this task repeatedly in limited batches can help maintain the instance’s responsiveness while still freeing up some space. +- `--no-singles` - Do not delete activites referencing single objects +- `--no-arrays` - Do not delete activites referencing an array of objects ## Create a conversation for all existing DMs diff --git a/lib/mix/tasks/pleroma/database.ex b/lib/mix/tasks/pleroma/database.ex index b4709fa3a..8bf4b38ca 100644 --- a/lib/mix/tasks/pleroma/database.ex +++ b/lib/mix/tasks/pleroma/database.ex @@ -28,28 +28,15 @@ defp maybe_limit(query, limit_cnt) do end end - def prune_orphaned_activities(limit \\ 0) when is_number(limit) do - limit_arg = - if limit > 0 do - "LIMIT #{limit}" - else - "" - end + defp limit_statement(limit) when is_number(limit) do + if limit > 0 do + "LIMIT #{limit}" + else + "" + end + end - # Activities can either refer to a single object id, and array of object ids - # or contain an inlined object (at least after going through our normalisation) - # - # Flag is the only type we support with an array (and always has arrays). - # Update the only one with inlined objects. - # - # We already regularly purge old Delete, Undo, Update and Remove and if - # rejected Follow requests anyway; no need to explicitly deal with those here. - # - # Since there’s an index on types and there are typically only few Flag - # activites, it’s _much_ faster to utilise the index. To avoid accidentally - # deleting useful activities should more types be added, keep typeof for singles. - - # Prune activities who link to a single object + defp prune_orphaned_activities_singles(limit) do %{:num_rows => del_single} = """ delete from public.activities @@ -63,14 +50,16 @@ def prune_orphaned_activities(limit \\ 0) when is_number(limit) do and o.id is null and a2.id is null and u.id is null - #{limit_arg} + #{limit_statement(limit)} ) """ |> Repo.query!([], timeout: :infinity) Logger.info("Prune activity singles: deleted #{del_single} rows...") + del_single + end - # Prune activities who link to an array of objects + defp prune_orphaned_activities_array(limit) do %{:num_rows => del_array} = """ delete from public.activities @@ -85,12 +74,44 @@ def prune_orphaned_activities(limit \\ 0) when is_number(limit) do having max(o.data ->> 'id') is null and max(a2.data ->> 'id') is null and max(u.ap_id) is null - #{limit_arg} + #{limit_statement(limit)} ) """ |> Repo.query!([], timeout: :infinity) Logger.info("Prune activity arrays: deleted #{del_array} rows...") + del_array + end + + def prune_orphaned_activities(limit \\ 0, opts \\ []) when is_number(limit) do + # Activities can either refer to a single object id, and array of object ids + # or contain an inlined object (at least after going through our normalisation) + # + # Flag is the only type we support with an array (and always has arrays). + # Update the only one with inlined objects. + # + # We already regularly purge old Delete, Undo, Update and Remove and if + # rejected Follow requests anyway; no need to explicitly deal with those here. + # + # Since there’s an index on types and there are typically only few Flag + # activites, it’s _much_ faster to utilise the index. To avoid accidentally + # deleting useful activities should more types be added, keep typeof for singles. + + # Prune activities who link to a single object + del_single = + if Keyword.get(opts, :singles, true) do + prune_orphaned_activities_singles(limit) + else + 0 + end + + # Prune activities who link to an array of objects + del_array = + if Keyword.get(opts, :arrays, true) do + prune_orphaned_activities_array(limit) + else + 0 + end del_single + del_array end @@ -142,13 +163,15 @@ def run(["prune_orphaned_activities" | args]) do OptionParser.parse( args, strict: [ - limit: :integer + limit: :integer, + singles: :boolean, + arrays: :boolean ] ) start_pleroma() - limit = Keyword.get(options, :limit, 0) + {limit, options} = Keyword.pop(options, :limit, 0) log_message = "Pruning orphaned activities" @@ -161,7 +184,7 @@ def run(["prune_orphaned_activities" | args]) do Logger.info(log_message) - deleted = prune_orphaned_activities(limit) + deleted = prune_orphaned_activities(limit, options) Logger.info("Deleted #{deleted} rows") end -- 2.43.0 From 70cd5f91d8667269a5fd17df4cd9368156286562 Mon Sep 17 00:00:00 2001 From: Oneric Date: Wed, 15 May 2024 02:17:34 +0200 Subject: [PATCH 10/11] dbprune/activites: prune array activities first This query is less costly; if something goes wrong or gets aborted later at least this part will arelady be done. --- lib/mix/tasks/pleroma/database.ex | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/lib/mix/tasks/pleroma/database.ex b/lib/mix/tasks/pleroma/database.ex index 8bf4b38ca..87ccfdff1 100644 --- a/lib/mix/tasks/pleroma/database.ex +++ b/lib/mix/tasks/pleroma/database.ex @@ -97,14 +97,6 @@ def prune_orphaned_activities(limit \\ 0, opts \\ []) when is_number(limit) do # activites, it’s _much_ faster to utilise the index. To avoid accidentally # deleting useful activities should more types be added, keep typeof for singles. - # Prune activities who link to a single object - del_single = - if Keyword.get(opts, :singles, true) do - prune_orphaned_activities_singles(limit) - else - 0 - end - # Prune activities who link to an array of objects del_array = if Keyword.get(opts, :arrays, true) do @@ -113,6 +105,14 @@ def prune_orphaned_activities(limit \\ 0, opts \\ []) when is_number(limit) do 0 end + # Prune activities who link to a single object + del_single = + if Keyword.get(opts, :singles, true) do + prune_orphaned_activities_singles(limit) + else + 0 + end + del_single + del_array end -- 2.43.0 From bed7ff8e890b7a3770df6fcab105b172d585d797 Mon Sep 17 00:00:00 2001 From: Oneric Date: Thu, 30 May 2024 01:15:04 +0000 Subject: [PATCH 11/11] mix: consistently use shell_info and shell_error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Logger output being visible depends on user configuration, but most of the prints in mix tasks should always be shown. When running inside a mix shell, it’s probably preferable to send output directly to it rather than using raw IO.puts and we already have shell_* functions for this, let’s use them everywhere. --- lib/mix/pleroma.ex | 12 +++++- lib/mix/tasks/pleroma/activity.ex | 5 +-- lib/mix/tasks/pleroma/diagnostics.ex | 15 ++++--- lib/mix/tasks/pleroma/emoji.ex | 32 +++++++-------- .../tasks/pleroma/refresh_counter_cache.ex | 1 - lib/mix/tasks/pleroma/search/meilisearch.ex | 18 ++++----- lib/mix/tasks/pleroma/security.ex | 40 +++++++++---------- lib/mix/tasks/pleroma/user.ex | 16 ++++---- test/mix/tasks/pleroma/user_test.exs | 14 ++++--- 9 files changed, 80 insertions(+), 73 deletions(-) diff --git a/lib/mix/pleroma.ex b/lib/mix/pleroma.ex index 52c651dc5..c6d7f5521 100644 --- a/lib/mix/pleroma.ex +++ b/lib/mix/pleroma.ex @@ -112,18 +112,26 @@ def shell_prompt(prompt, defval \\ nil, defname \\ nil) do end end - def shell_info(message) do + def shell_info(message) when is_binary(message) or is_list(message) do if mix_shell?(), do: Mix.shell().info(message), else: IO.puts(message) end - def shell_error(message) do + def shell_info(message) do + shell_info("#{inspect(message)}") + end + + def shell_error(message) when is_binary(message) or is_list(message) do if mix_shell?(), do: Mix.shell().error(message), else: IO.puts(:stderr, message) end + def shell_error(message) do + shell_error("#{inspect(message)}") + end + @doc "Performs a safe check whether `Mix.shell/0` is available (does not raise if Mix is not loaded)" def mix_shell?, do: :erlang.function_exported(Mix, :shell, 0) diff --git a/lib/mix/tasks/pleroma/activity.ex b/lib/mix/tasks/pleroma/activity.ex index 84b9c16f9..ad25c3ee9 100644 --- a/lib/mix/tasks/pleroma/activity.ex +++ b/lib/mix/tasks/pleroma/activity.ex @@ -8,7 +8,6 @@ defmodule Mix.Tasks.Pleroma.Activity do alias Pleroma.User alias Pleroma.Web.CommonAPI alias Pleroma.Pagination - require Logger import Mix.Pleroma import Ecto.Query @@ -17,7 +16,7 @@ def run(["get", id | _rest]) do id |> Activity.get_by_id() - |> IO.inspect() + |> shell_info() end def run(["delete_by_keyword", user, keyword | _rest]) do @@ -35,7 +34,7 @@ def run(["delete_by_keyword", user, keyword | _rest]) do ) |> Enum.map(fn x -> CommonAPI.delete(x.id, u) end) |> Enum.count() - |> IO.puts() + |> shell_info() end defp query_with(q, search_query) do diff --git a/lib/mix/tasks/pleroma/diagnostics.ex b/lib/mix/tasks/pleroma/diagnostics.ex index ab7def110..7b72c6768 100644 --- a/lib/mix/tasks/pleroma/diagnostics.ex +++ b/lib/mix/tasks/pleroma/diagnostics.ex @@ -3,7 +3,6 @@ defmodule Mix.Tasks.Pleroma.Diagnostics do alias Pleroma.Repo alias Pleroma.User - require Logger require Pleroma.Constants import Mix.Pleroma @@ -14,7 +13,7 @@ def run(["http", url]) do start_pleroma() Pleroma.HTTP.get(url) - |> IO.inspect() + |> shell_info() end def run(["fetch_object", url]) do @@ -27,7 +26,7 @@ def run(["fetch_object", url]) do def run(["home_timeline", nickname]) do start_pleroma() user = Repo.get_by!(User, nickname: nickname) - Logger.info("Home timeline query #{user.nickname}") + shell_info("Home timeline query #{user.nickname}") followed_hashtags = user @@ -56,14 +55,14 @@ def run(["home_timeline", nickname]) do |> limit(20) Ecto.Adapters.SQL.explain(Repo, :all, query, analyze: true, timeout: :infinity) - |> IO.puts() + |> shell_info() end def run(["user_timeline", nickname, reading_nickname]) do start_pleroma() user = Repo.get_by!(User, nickname: nickname) reading_user = Repo.get_by!(User, nickname: reading_nickname) - Logger.info("User timeline query #{user.nickname}") + shell_info("User timeline query #{user.nickname}") params = %{limit: 20} @@ -87,7 +86,7 @@ def run(["user_timeline", nickname, reading_nickname]) do |> limit(20) Ecto.Adapters.SQL.explain(Repo, :all, query, analyze: true, timeout: :infinity) - |> IO.puts() + |> shell_info() end def run(["notifications", nickname]) do @@ -103,7 +102,7 @@ def run(["notifications", nickname]) do |> limit(20) Ecto.Adapters.SQL.explain(Repo, :all, query, analyze: true, timeout: :infinity) - |> IO.puts() + |> shell_info() end def run(["known_network", nickname]) do @@ -129,6 +128,6 @@ def run(["known_network", nickname]) do |> limit(20) Ecto.Adapters.SQL.explain(Repo, :all, query, analyze: true, timeout: :infinity) - |> IO.puts() + |> shell_info() end end diff --git a/lib/mix/tasks/pleroma/emoji.ex b/lib/mix/tasks/pleroma/emoji.ex index 8dda1512d..12918dfff 100644 --- a/lib/mix/tasks/pleroma/emoji.ex +++ b/lib/mix/tasks/pleroma/emoji.ex @@ -27,11 +27,11 @@ def run(["ls-packs" | args]) do ] for {param, value} <- to_print do - IO.puts(IO.ANSI.format([:bright, param, :normal, ": ", value])) + shell_info(IO.ANSI.format([:bright, param, :normal, ": ", value])) end # A newline - IO.puts("") + shell_info("") end) end @@ -49,7 +49,7 @@ def run(["get-packs" | args]) do pack = manifest[pack_name] src = pack["src"] - IO.puts( + shell_info( IO.ANSI.format([ "Downloading ", :bright, @@ -67,9 +67,9 @@ def run(["get-packs" | args]) do sha_status_text = ["SHA256 of ", :bright, pack_name, :normal, " source file is ", :bright] if archive_sha == String.upcase(pack["src_sha256"]) do - IO.puts(IO.ANSI.format(sha_status_text ++ [:green, "OK"])) + shell_info(IO.ANSI.format(sha_status_text ++ [:green, "OK"])) else - IO.puts(IO.ANSI.format(sha_status_text ++ [:red, "BAD"])) + shell_info(IO.ANSI.format(sha_status_text ++ [:red, "BAD"])) raise "Bad SHA256 for #{pack_name}" end @@ -80,7 +80,7 @@ def run(["get-packs" | args]) do |> Path.dirname() |> Path.join(pack["files"]) - IO.puts( + shell_info( IO.ANSI.format([ "Fetching the file list for ", :bright, @@ -94,7 +94,7 @@ def run(["get-packs" | args]) do files = fetch_and_decode!(files_loc) - IO.puts(IO.ANSI.format(["Unpacking ", :bright, pack_name])) + shell_info(IO.ANSI.format(["Unpacking ", :bright, pack_name])) pack_path = Path.join([ @@ -115,7 +115,7 @@ def run(["get-packs" | args]) do file_list: files_to_unzip ) - IO.puts(IO.ANSI.format(["Writing pack.json for ", :bright, pack_name])) + shell_info(IO.ANSI.format(["Writing pack.json for ", :bright, pack_name])) pack_json = %{ pack: %{ @@ -132,7 +132,7 @@ def run(["get-packs" | args]) do File.write!(Path.join(pack_path, "pack.json"), Jason.encode!(pack_json, pretty: true)) Pleroma.Emoji.reload() else - IO.puts(IO.ANSI.format([:bright, :red, "No pack named \"#{pack_name}\" found"])) + shell_info(IO.ANSI.format([:bright, :red, "No pack named \"#{pack_name}\" found"])) end end end @@ -180,14 +180,14 @@ def run(["gen-pack" | args]) do custom_exts end - IO.puts("Using #{Enum.join(exts, " ")} extensions") + shell_info("Using #{Enum.join(exts, " ")} extensions") - IO.puts("Downloading the pack and generating SHA256") + shell_info("Downloading the pack and generating SHA256") {:ok, %{body: binary_archive}} = Pleroma.HTTP.get(src) archive_sha = :crypto.hash(:sha256, binary_archive) |> Base.encode16() - IO.puts("SHA256 is #{archive_sha}") + shell_info("SHA256 is #{archive_sha}") pack_json = %{ name => %{ @@ -208,7 +208,7 @@ def run(["gen-pack" | args]) do File.write!(files_name, Jason.encode!(emoji_map, pretty: true)) - IO.puts(""" + shell_info(""" #{files_name} has been created and contains the list of all found emojis in the pack. Please review the files in the pack and remove those not needed. @@ -230,11 +230,11 @@ def run(["gen-pack" | args]) do ) ) - IO.puts("#{pack_file} has been updated with the #{name} pack") + shell_info("#{pack_file} has been updated with the #{name} pack") else File.write!(pack_file, Jason.encode!(pack_json, pretty: true)) - IO.puts("#{pack_file} has been created with the #{name} pack") + shell_info("#{pack_file} has been created with the #{name} pack") end Pleroma.Emoji.reload() @@ -243,7 +243,7 @@ def run(["gen-pack" | args]) do def run(["reload"]) do start_pleroma() Pleroma.Emoji.reload() - IO.puts("Emoji packs have been reloaded.") + shell_info("Emoji packs have been reloaded.") end defp fetch_and_decode!(from) do diff --git a/lib/mix/tasks/pleroma/refresh_counter_cache.ex b/lib/mix/tasks/pleroma/refresh_counter_cache.ex index 66eed8657..58384cf63 100644 --- a/lib/mix/tasks/pleroma/refresh_counter_cache.ex +++ b/lib/mix/tasks/pleroma/refresh_counter_cache.ex @@ -11,7 +11,6 @@ defmodule Mix.Tasks.Pleroma.RefreshCounterCache do alias Pleroma.CounterCache alias Pleroma.Repo - require Logger import Ecto.Query def run([]) do diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index e4dc616b4..f78a190aa 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -48,7 +48,7 @@ def run(["index"]) do ] ) - IO.puts("Created indices. Starting to insert posts.") + shell_info("Created indices. Starting to insert posts.") chunk_size = Pleroma.Config.get([Pleroma.Search.Meilisearch, :initial_indexing_chunk_size]) @@ -65,7 +65,7 @@ def run(["index"]) do ) count = query |> Pleroma.Repo.aggregate(:count, :data) - IO.puts("Entries to index: #{count}") + shell_info("Entries to index: #{count}") Pleroma.Repo.stream( query, @@ -92,10 +92,10 @@ def run(["index"]) do with {:ok, res} <- result do if not Map.has_key?(res, "indexUid") do - IO.puts("\nFailed to index: #{inspect(result)}") + shell_info("\nFailed to index: #{inspect(result)}") end else - e -> IO.puts("\nFailed to index due to network error: #{inspect(e)}") + e -> shell_error("\nFailed to index due to network error: #{inspect(e)}") end end) |> Stream.run() @@ -128,13 +128,13 @@ def run(["show-keys", master_key]) do if decoded["results"] do Enum.each(decoded["results"], fn %{"name" => name, "key" => key} -> - IO.puts("#{name}: #{key}") + shell_info("#{name}: #{key}") %{"description" => desc, "key" => key} -> - IO.puts("#{desc}: #{key}") + shell_info("#{desc}: #{key}") end) else - IO.puts("Error fetching the keys, check the master key is correct: #{inspect(decoded)}") + shell_error("Error fetching the keys, check the master key is correct: #{inspect(decoded)}") end end @@ -142,7 +142,7 @@ def run(["stats"]) do start_pleroma() {:ok, result} = meili_get("/indexes/objects/stats") - IO.puts("Number of entries: #{result["numberOfDocuments"]}") - IO.puts("Indexing? #{result["isIndexing"]}") + shell_info("Number of entries: #{result["numberOfDocuments"]}") + shell_info("Indexing? #{result["isIndexing"]}") end end diff --git a/lib/mix/tasks/pleroma/security.ex b/lib/mix/tasks/pleroma/security.ex index f039e0980..3af0ce8bf 100644 --- a/lib/mix/tasks/pleroma/security.ex +++ b/lib/mix/tasks/pleroma/security.ex @@ -38,7 +38,7 @@ def run(["spoof-uploaded"]) do Logger.put_process_level(self(), :notice) start_pleroma() - IO.puts(""" + shell_info(""" +------------------------+ | SPOOF SEARCH UPLOADS | +------------------------+ @@ -55,7 +55,7 @@ def run(["spoof-inserted"]) do Logger.put_process_level(self(), :notice) start_pleroma() - IO.puts(""" + shell_info(""" +----------------------+ | SPOOF SEARCH NOTES | +----------------------+ @@ -77,7 +77,7 @@ defp do_spoof_uploaded() do uploads_search_spoofs_local_dir(Config.get!([Pleroma.Uploaders.Local, :uploads])) _ -> - IO.puts(""" + shell_info(""" NOTE: Not using local uploader; thus not affected by this exploit. It's impossible to check for files, but in case local uploader was used before @@ -98,13 +98,13 @@ defp do_spoof_uploaded() do orphaned_attachs = upload_search_orphaned_attachments(not_orphaned_urls) - IO.puts("\nSearch concluded; here are the results:") + shell_info("\nSearch concluded; here are the results:") pretty_print_list_with_title(emoji, "Emoji") pretty_print_list_with_title(files, "Uploaded Files") pretty_print_list_with_title(post_attachs, "(Not Deleted) Post Attachments") pretty_print_list_with_title(orphaned_attachs, "Orphaned Uploads") - IO.puts(""" + shell_info(""" In total found #{length(emoji)} emoji #{length(files)} uploads @@ -116,7 +116,7 @@ defp do_spoof_uploaded() do defp uploads_search_spoofs_local_dir(dir) do local_dir = String.replace_suffix(dir, "/", "") - IO.puts("Searching for suspicious files in #{local_dir}...") + shell_info("Searching for suspicious files in #{local_dir}...") glob_ext = "{" <> Enum.join(@activity_exts, ",") <> "}" @@ -128,7 +128,7 @@ defp uploads_search_spoofs_local_dir(dir) do end defp uploads_search_spoofs_notes() do - IO.puts("Now querying DB for posts with spoofing attachments. This might take a while...") + shell_info("Now querying DB for posts with spoofing attachments. This might take a while...") patterns = [local_id_pattern() | activity_ext_url_patterns()] @@ -153,7 +153,7 @@ defp uploads_search_spoofs_notes() do end defp upload_search_orphaned_attachments(not_orphaned_urls) do - IO.puts(""" + shell_info(""" Now querying DB for orphaned spoofing attachment (i.e. their post was deleted, but if :cleanup_attachments was not enabled traces remain in the database) This might take a bit... @@ -184,7 +184,7 @@ defp upload_search_orphaned_attachments(not_orphaned_urls) do # | S P O O F - I N S E R T E D | # +-----------------------------+ defp do_spoof_inserted() do - IO.puts(""" + shell_info(""" Searching for local posts whose Create activity has no ActivityPub id... This is a pretty good indicator, but only for spoofs of local actors and only if the spoofing happened after around late 2021. @@ -194,9 +194,9 @@ defp do_spoof_inserted() do search_local_notes_without_create_id() |> Enum.sort() - IO.puts("Done.\n") + shell_info("Done.\n") - IO.puts(""" + shell_info(""" Now trying to weed out other poorly hidden spoofs. This can't detect all and may have some false positives. """) @@ -207,9 +207,9 @@ defp do_spoof_inserted() do search_sus_notes_by_id_patterns() |> Enum.filter(fn r -> !(r in likely_spoofed_posts_set) end) - IO.puts("Done.\n") + shell_info("Done.\n") - IO.puts(""" + shell_info(""" Finally, searching for spoofed, local user accounts. (It's impossible to detect spoofed remote users) """) @@ -220,7 +220,7 @@ defp do_spoof_inserted() do pretty_print_list_with_title(idless_create, "Likely Spoofed Posts") pretty_print_list_with_title(spoofed_users, "Spoofed local user accounts") - IO.puts(""" + shell_info(""" In total found: #{length(spoofed_users)} bogus users #{length(idless_create)} likely spoofed posts @@ -289,27 +289,27 @@ defp search_bogus_local_users() do defp pretty_print_list_with_title(list, title) do title_len = String.length(title) title_underline = String.duplicate("=", title_len) - IO.puts(title) - IO.puts(title_underline) + shell_info(title) + shell_info(title_underline) pretty_print_list(list) end - defp pretty_print_list([]), do: IO.puts("") + defp pretty_print_list([]), do: shell_info("") defp pretty_print_list([{a, o} | rest]) when (is_binary(a) or is_number(a)) and is_binary(o) do - IO.puts(" {#{a}, #{o}}") + shell_info(" {#{a}, #{o}}") pretty_print_list(rest) end defp pretty_print_list([{u, a, o} | rest]) when is_binary(a) and is_binary(u) and is_binary(o) do - IO.puts(" {#{u}, #{a}, #{o}}") + shell_info(" {#{u}, #{a}, #{o}}") pretty_print_list(rest) end defp pretty_print_list([e | rest]) when is_binary(e) do - IO.puts(" #{e}") + shell_info(" #{e}") pretty_print_list(rest) end diff --git a/lib/mix/tasks/pleroma/user.ex b/lib/mix/tasks/pleroma/user.ex index 1a8e866ef..8679e2b36 100644 --- a/lib/mix/tasks/pleroma/user.ex +++ b/lib/mix/tasks/pleroma/user.ex @@ -114,7 +114,7 @@ def run(["reset_password", nickname]) do {:ok, token} <- Pleroma.PasswordResetToken.create_token(user) do shell_info("Generated password reset token for #{user.nickname}") - IO.puts("URL: #{~p[/api/v1/pleroma/password_reset/#{token.token}]}") + shell_info("URL: #{~p[/api/v1/pleroma/password_reset/#{token.token}]}") else _ -> shell_error("No local user #{nickname}") @@ -301,7 +301,7 @@ def run(["invite" | rest]) do shell_info("Generated user invite token " <> String.replace(invite.invite_type, "_", " ")) url = url(~p[/registration/#{invite.token}]) - IO.puts(url) + shell_info(url) else error -> shell_error("Could not create invite token: #{inspect(error)}") @@ -373,7 +373,7 @@ def run(["show", nickname]) do nickname |> User.get_cached_by_nickname() - shell_info("#{inspect(user)}") + shell_info(user) end def run(["send_confirmation", nickname]) do @@ -457,7 +457,7 @@ def run(["blocking", nickname]) do with %User{local: true} = user <- User.get_cached_by_nickname(nickname) do blocks = User.following_ap_ids(user) - IO.puts("#{inspect(blocks)}") + shell_info(blocks) end end @@ -516,12 +516,12 @@ def run(["fix_follow_state", local_user, remote_user]) do {:follow_data, Pleroma.Web.ActivityPub.Utils.fetch_latest_follow(local, remote)} do calculated_state = User.following?(local, remote) - IO.puts( + shell_info( "Request state is #{request_state}, vs calculated state of following=#{calculated_state}" ) if calculated_state == false && request_state == "accept" do - IO.puts("Discrepancy found, fixing") + shell_info("Discrepancy found, fixing") Pleroma.Web.CommonAPI.reject_follow_request(local, remote) shell_info("Relationship fixed") else @@ -551,14 +551,14 @@ defp refetch_public_keys(query) do |> Stream.each(fn users -> users |> Enum.each(fn user -> - IO.puts("Re-Resolving: #{user.ap_id}") + shell_info("Re-Resolving: #{user.ap_id}") with {:ok, user} <- Pleroma.User.fetch_by_ap_id(user.ap_id), changeset <- Pleroma.User.update_changeset(user), {:ok, _user} <- Pleroma.User.update_and_set_cache(changeset) do :ok else - error -> IO.puts("Could not resolve: #{user.ap_id}, #{inspect(error)}") + error -> shell_info("Could not resolve: #{user.ap_id}, #{inspect(error)}") end end) end) diff --git a/test/mix/tasks/pleroma/user_test.exs b/test/mix/tasks/pleroma/user_test.exs index 414b87bec..021b2268f 100644 --- a/test/mix/tasks/pleroma/user_test.exs +++ b/test/mix/tasks/pleroma/user_test.exs @@ -280,12 +280,13 @@ test "no user to set status" do test "password reset token is generated" do user = insert(:user) - assert capture_io(fn -> - Mix.Tasks.Pleroma.User.run(["reset_password", user.nickname]) - end) =~ "URL:" + Mix.Tasks.Pleroma.User.run(["reset_password", user.nickname]) assert_receive {:mix_shell, :info, [message]} assert message =~ "Generated" + + assert_receive {:mix_shell, :info, [url]} + assert url =~ "URL:" end test "no user to reset password" do @@ -327,12 +328,13 @@ test "no user to reset MFA" do describe "running invite" do test "invite token is generated" do - assert capture_io(fn -> - Mix.Tasks.Pleroma.User.run(["invite"]) - end) =~ "http" + Mix.Tasks.Pleroma.User.run(["invite"]) assert_receive {:mix_shell, :info, [message]} assert message =~ "Generated user invite token one time" + + assert_receive {:mix_shell, :info, [invite_token]} + assert invite_token =~ "http" end test "token is generated with expires_at" do -- 2.43.0