[#3213] Refactoring of HashtagsTableMigrator. Hashtag timeline performance optimization (auto switch to non-aggregate join strategy when efficient).

This commit is contained in:
Ivan Tashkinov 2021-01-16 20:22:14 +03:00
parent f5f267fa76
commit 48b399cedb
6 changed files with 86 additions and 53 deletions

View file

@ -15,6 +15,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
- Search: When using Postgres 11+, Pleroma will use the `websearch_to_tsvector` function to parse search queries. - Search: When using Postgres 11+, Pleroma will use the `websearch_to_tsvector` function to parse search queries.
- Emoji: Support the full Unicode 13.1 set of Emoji for reactions, plus regional indicators. - Emoji: Support the full Unicode 13.1 set of Emoji for reactions, plus regional indicators.
- Admin API: Reports now ordered by newest - Admin API: Reports now ordered by newest
- Extracted object hashtags into separate table in order to improve hashtag timeline performance (via background migration in `Pleroma.Migrators.HashtagsTableMigrator`).
### Added ### Added

View file

@ -941,6 +941,12 @@
key: :show_reactions, key: :show_reactions,
type: :boolean, type: :boolean,
description: "Let favourites and emoji reactions be viewed through the API." description: "Let favourites and emoji reactions be viewed through the API."
},
%{
key: :improved_hashtag_timeline,
type: :keyword,
description:
"If `true` / `:prefer_aggregation` / `:avoid_aggregation`, hashtags table and selected strategy will be used for hashtags timeline. When `false`, object-embedded hashtags will be used (slower). Is auto-set to `true` (unless overridden) when HashtagsTableMigrator completes."
} }
] ]
}, },

View file

@ -45,25 +45,23 @@ def init(_) do
def handle_continue(:init_state, _state) do def handle_continue(:init_state, _state) do
{:ok, _} = State.start_link(nil) {:ok, _} = State.start_link(nil)
put_stat(:status, :init) update_status(:init)
dm = data_migration() data_migration = data_migration()
manual_migrations = Config.get([:instance, :manual_data_migrations], []) manual_migrations = Config.get([:instance, :manual_data_migrations], [])
cond do cond do
Config.get(:env) == :test -> Config.get(:env) == :test ->
put_stat(:status, :noop) update_status(:noop)
is_nil(dm) -> is_nil(data_migration) ->
put_stat(:status, :halt) update_status(:halt, "Data migration does not exist.")
put_stat(:message, "Data migration does not exist.")
dm.state == :manual or dm.name in manual_migrations -> data_migration.state == :manual or data_migration.name in manual_migrations ->
put_stat(:status, :noop) update_status(:noop, "Data migration is in manual execution state.")
put_stat(:message, "Data migration is in manual execution state.")
dm.state == :complete -> data_migration.state == :complete ->
handle_success() handle_success(data_migration)
true -> true ->
send(self(), :migrate_hashtags) send(self(), :migrate_hashtags)
@ -81,7 +79,7 @@ def handle_info(:migrate_hashtags, state) do
{:ok, data_migration} = {:ok, data_migration} =
DataMigration.update(data_migration, %{state: :running, data: persistent_data}) DataMigration.update(data_migration, %{state: :running, data: persistent_data})
put_stat(:status, :running) update_status(:running)
Logger.info("Starting transferring object embedded hashtags to `hashtags` table...") Logger.info("Starting transferring object embedded hashtags to `hashtags` table...")
@ -146,13 +144,12 @@ def handle_info(:migrate_hashtags, state) do
) do ) do
_ = DataMigration.update_state(data_migration, :complete) _ = DataMigration.update_state(data_migration, :complete)
handle_success() handle_success(data_migration)
else else
_ -> _ ->
_ = DataMigration.update_state(data_migration, :failed) _ = DataMigration.update_state(data_migration, :failed)
put_stat(:status, :failed) update_status(:failed, "Please check data_migration_failed_ids records.")
put_stat(:message, "Please check data_migration_failed_ids records.")
end end
{:noreply, state} {:noreply, state}
@ -196,16 +193,25 @@ defp persist_stats(data_migration) do
_ = DataMigration.update(data_migration, %{data: runner_state}) _ = DataMigration.update(data_migration, %{data: runner_state})
end end
defp handle_success do defp handle_success(data_migration) do
put_stat(:status, :complete) update_status(:complete)
unless Config.improved_hashtag_timeline() do unless data_migration.feature_lock || Config.improved_hashtag_timeline() do
Config.put(Config.improved_hashtag_timeline_path(), true) Config.put(Config.improved_hashtag_timeline_path(), true)
end end
:ok :ok
end end
def failed_objects_query do
from(o in Object)
|> join(:inner, [o], dmf in fragment("SELECT * FROM data_migration_failed_ids"),
on: dmf.record_id == o.id
)
|> where([_o, dmf], dmf.data_migration_id == ^data_migration().id)
|> order_by([o], asc: o.id)
end
def force_continue do def force_continue do
send(whereis(), :migrate_hashtags) send(whereis(), :migrate_hashtags)
end end
@ -214,4 +220,9 @@ def force_restart do
{:ok, _} = DataMigration.update(data_migration(), %{state: :pending, data: %{}}) {:ok, _} = DataMigration.update(data_migration(), %{state: :pending, data: %{}})
force_continue() force_continue()
end end
defp update_status(status, message \\ nil) do
put_stat(:status, status)
put_stat(:message, message)
end
end end

View file

@ -2,23 +2,24 @@ defmodule Pleroma.Migrators.HashtagsTableMigrator.State do
use Agent use Agent
@init_state %{} @init_state %{}
@reg_name {:global, __MODULE__}
def start_link(_) do def start_link(_) do
Agent.start_link(fn -> @init_state end, name: __MODULE__) Agent.start_link(fn -> @init_state end, name: @reg_name)
end end
def get do def get do
Agent.get(__MODULE__, & &1) Agent.get(@reg_name, & &1)
end end
def put(key, value) do def put(key, value) do
Agent.update(__MODULE__, fn state -> Agent.update(@reg_name, fn state ->
Map.put(state, key, value) Map.put(state, key, value)
end) end)
end end
def increment(key, increment \\ 1) do def increment(key, increment \\ 1) do
Agent.update(__MODULE__, fn state -> Agent.update(@reg_name, fn state ->
updated_value = (state[key] || 0) + increment updated_value = (state[key] || 0) + increment
Map.put(state, key, updated_value) Map.put(state, key, updated_value)
end) end)

View file

@ -669,63 +669,66 @@ defp restrict_since(query, %{since_id: since_id}) do
defp restrict_since(query, _), do: query defp restrict_since(query, _), do: query
defp restrict_tag_reject(_query, %{tag_reject: _tag_reject, skip_preload: true}) do defp restrict_embedded_tag_reject(_query, %{tag_reject: _tag_reject, skip_preload: true}) do
raise_on_missing_preload() raise_on_missing_preload()
end end
defp restrict_tag_reject(query, %{tag_reject: tag_reject}) when is_list(tag_reject) do defp restrict_embedded_tag_reject(query, %{tag_reject: tag_reject}) when is_list(tag_reject) do
from( from(
[_activity, object] in query, [_activity, object] in query,
where: fragment("not (?)->'tag' \\?| (?)", object.data, ^tag_reject) where: fragment("not (?)->'tag' \\?| (?)", object.data, ^tag_reject)
) )
end end
defp restrict_tag_reject(query, %{tag_reject: tag_reject}) when is_binary(tag_reject) do defp restrict_embedded_tag_reject(query, %{tag_reject: tag_reject})
restrict_tag_reject(query, %{tag_reject: [tag_reject]}) when is_binary(tag_reject) do
restrict_embedded_tag_reject(query, %{tag_reject: [tag_reject]})
end end
defp restrict_tag_reject(query, _), do: query defp restrict_embedded_tag_reject(query, _), do: query
defp restrict_tag_all(_query, %{tag_all: _tag_all, skip_preload: true}) do defp restrict_embedded_tag_all(_query, %{tag_all: _tag_all, skip_preload: true}) do
raise_on_missing_preload() raise_on_missing_preload()
end end
defp restrict_tag_all(query, %{tag_all: tag_all}) when is_list(tag_all) do defp restrict_embedded_tag_all(query, %{tag_all: tag_all}) when is_list(tag_all) do
from( from(
[_activity, object] in query, [_activity, object] in query,
where: fragment("(?)->'tag' \\?& (?)", object.data, ^tag_all) where: fragment("(?)->'tag' \\?& (?)", object.data, ^tag_all)
) )
end end
defp restrict_tag_all(query, %{tag_all: tag}) when is_binary(tag) do defp restrict_embedded_tag_all(query, %{tag_all: tag}) when is_binary(tag) do
restrict_tag(query, %{tag: tag}) restrict_embedded_tag(query, %{tag: tag})
end end
defp restrict_tag_all(query, _), do: query defp restrict_embedded_tag_all(query, _), do: query
defp restrict_tag(_query, %{tag: _tag, skip_preload: true}) do defp restrict_embedded_tag(_query, %{tag: _tag, skip_preload: true}) do
raise_on_missing_preload() raise_on_missing_preload()
end end
defp restrict_tag(query, %{tag: tag}) when is_list(tag) do defp restrict_embedded_tag(query, %{tag: tag}) when is_list(tag) do
from( from(
[_activity, object] in query, [_activity, object] in query,
where: fragment("(?)->'tag' \\?| (?)", object.data, ^tag) where: fragment("(?)->'tag' \\?| (?)", object.data, ^tag)
) )
end end
defp restrict_tag(query, %{tag: tag}) when is_binary(tag) do defp restrict_embedded_tag(query, %{tag: tag}) when is_binary(tag) do
restrict_tag(query, %{tag: [tag]}) restrict_embedded_tag(query, %{tag: [tag]})
end end
defp restrict_tag(query, _), do: query defp restrict_embedded_tag(query, _), do: query
defp restrict_hashtag(query, opts) do defp hashtag_conditions(opts) do
[tag_any, tag_all, tag_reject] = [:tag, :tag_all, :tag_reject]
[:tag, :tag_all, :tag_reject] |> Enum.map(&opts[&1])
|> Enum.map(&opts[&1]) |> Enum.map(&List.wrap(&1))
|> Enum.map(&List.wrap(&1)) end
defp restrict_hashtag_agg(query, opts) do
[tag_any, tag_all, tag_reject] = hashtag_conditions(opts)
has_conditions = Enum.any?([tag_any, tag_all, tag_reject], &Enum.any?(&1)) has_conditions = Enum.any?([tag_any, tag_all, tag_reject], &Enum.any?(&1))
cond do cond do
@ -1275,15 +1278,19 @@ def fetch_activities_query(recipients, opts \\ %{}) do
|> exclude_invisible_actors(opts) |> exclude_invisible_actors(opts)
|> exclude_visibility(opts) |> exclude_visibility(opts)
cond do hashtag_timeline_strategy = Config.improved_hashtag_timeline()
Config.object_embedded_hashtags?() ->
query
|> restrict_tag(opts)
|> restrict_tag_reject(opts)
|> restrict_tag_all(opts)
# TODO: benchmark (initial approach preferring non-aggregate ops when possible) cond do
Config.improved_hashtag_timeline() == :join -> !hashtag_timeline_strategy ->
query
|> restrict_embedded_tag(opts)
|> restrict_embedded_tag_reject(opts)
|> restrict_embedded_tag_all(opts)
hashtag_timeline_strategy == :prefer_aggregation ->
restrict_hashtag_agg(query, opts)
hashtag_timeline_strategy == :avoid_aggregation or avoid_hashtags_aggregation?(opts) ->
query query
|> distinct([activity], true) |> distinct([activity], true)
|> restrict_hashtag_any(opts) |> restrict_hashtag_any(opts)
@ -1291,10 +1298,17 @@ def fetch_activities_query(recipients, opts \\ %{}) do
|> restrict_hashtag_reject_any(opts) |> restrict_hashtag_reject_any(opts)
true -> true ->
restrict_hashtag(query, opts) restrict_hashtag_agg(query, opts)
end end
end end
defp avoid_hashtags_aggregation?(opts) do
[tag_any, tag_all, tag_reject] = hashtag_conditions(opts)
joins_count = length(tag_all) + if Enum.any?(tag_any), do: 1, else: 0
Enum.empty?(tag_reject) and joins_count <= 2
end
def fetch_activities(recipients, opts \\ %{}, pagination \\ :keyset) do def fetch_activities(recipients, opts \\ %{}, pagination \\ :keyset) do
list_memberships = Pleroma.List.memberships(opts[:user]) list_memberships = Pleroma.List.memberships(opts[:user])

View file

@ -217,8 +217,8 @@ test "it fetches the appropriate tag-restricted posts" do
{:ok, status_two} = CommonAPI.post(user, %{status: ". #essais"}) {:ok, status_two} = CommonAPI.post(user, %{status: ". #essais"})
{:ok, status_three} = CommonAPI.post(user, %{status: ". #test #reject"}) {:ok, status_three} = CommonAPI.post(user, %{status: ". #test #reject"})
for new_timeline_enabled <- [true, false] do for hashtag_timeline_strategy <- [true, :prefer_aggregation, :avoid_aggregation, false] do
clear_config([:instance, :improved_hashtag_timeline], new_timeline_enabled) clear_config([:instance, :improved_hashtag_timeline], hashtag_timeline_strategy)
fetch_one = ActivityPub.fetch_activities([], %{type: "Create", tag: "test"}) fetch_one = ActivityPub.fetch_activities([], %{type: "Create", tag: "test"})