Alternative implementation of hashtag-filtering queries in ActivityPub. Fixed GROUP BY clause for aggregation on hashtags.

This commit is contained in:
Ivan Tashkinov 2020-12-31 12:45:23 +03:00
parent 367f0c31c3
commit 303055456f
2 changed files with 100 additions and 22 deletions

View file

@ -113,6 +113,7 @@ def with_preloaded_bookmark(query, %User{} = user) do
from([a] in query, from([a] in query,
left_join: b in Bookmark, left_join: b in Bookmark,
on: b.user_id == ^user.id and b.activity_id == a.id, on: b.user_id == ^user.id and b.activity_id == a.id,
as: :bookmark,
preload: [bookmark: b] preload: [bookmark: b]
) )
end end
@ -123,6 +124,7 @@ def with_preloaded_report_notes(query) do
from([a] in query, from([a] in query,
left_join: r in ReportNote, left_join: r in ReportNote,
on: a.id == r.activity_id, on: a.id == r.activity_id,
as: :report_note,
preload: [report_notes: r] preload: [report_notes: r]
) )
end end

View file

@ -713,22 +713,92 @@ defp restrict_tag(query, %{tag: tag}) when is_binary(tag) do
defp restrict_tag(query, _), do: query defp restrict_tag(query, _), do: query
defp restrict_hashtag(query, opts) do
[tag_any, tag_all, tag_reject] =
[:tag, :tag_all, :tag_reject]
|> Enum.map(&opts[&1])
|> Enum.map(&List.wrap(&1))
has_conditions = Enum.any?([tag_any, tag_all, tag_reject], &Enum.any?(&1))
cond do
!has_conditions ->
query
opts[:skip_preload] ->
raise_on_missing_preload()
true ->
query
|> group_by_all_bindings()
|> join(:left, [_activity, object], hashtag in assoc(object, :hashtags), as: :hashtag)
|> maybe_restrict_hashtag_any(tag_any)
|> maybe_restrict_hashtag_all(tag_all)
|> maybe_restrict_hashtag_reject_any(tag_reject)
end
end
# Groups by all bindings to allow aggregation on hashtags
defp group_by_all_bindings(query) do
# Expecting named bindings: :object, :bookmark, :thread_mute, :report_note
cond do
Enum.count(query.aliases) == 4 ->
from([a, o, b3, b4, b5] in query, group_by: [a.id, o.id, b3.id, b4.id, b5.id])
Enum.count(query.aliases) == 3 ->
from([a, o, b3, b4] in query, group_by: [a.id, o.id, b3.id, b4.id])
Enum.count(query.aliases) == 2 ->
from([a, o, b3] in query, group_by: [a.id, o.id, b3.id])
true ->
from([a, o] in query, group_by: [a.id, o.id])
end
end
defp maybe_restrict_hashtag_any(query, []) do
query
end
defp maybe_restrict_hashtag_any(query, tags) do
having(
query,
[hashtag: hashtag],
fragment("array_agg(?) && (?)", hashtag.name, ^tags)
)
end
defp maybe_restrict_hashtag_all(query, []) do
query
end
defp maybe_restrict_hashtag_all(query, tags) do
having(
query,
[hashtag: hashtag],
fragment("array_agg(?) @> (?)", hashtag.name, ^tags)
)
end
defp maybe_restrict_hashtag_reject_any(query, []) do
query
end
defp maybe_restrict_hashtag_reject_any(query, tags) do
having(
query,
[hashtag: hashtag],
fragment("not(array_agg(?) && (?))", hashtag.name, ^tags)
)
end
defp restrict_hashtag_reject_any(_query, %{tag_reject: _tag_reject, skip_preload: true}) do defp restrict_hashtag_reject_any(_query, %{tag_reject: _tag_reject, skip_preload: true}) do
raise_on_missing_preload() raise_on_missing_preload()
end end
defp restrict_hashtag_reject_any(query, %{tag_reject: tags_reject}) when is_list(tags_reject) do defp restrict_hashtag_reject_any(query, %{tag_reject: tags_reject}) when is_list(tags_reject) do
if has_named_binding?(query, :thread_mute) do query
from( |> group_by_all_bindings()
[activity, object, thread_mute] in query,
group_by: [activity.id, object.id, thread_mute.id]
)
else
from(
[activity, object] in query,
group_by: [activity.id, object.id]
)
end
|> join(:left, [_activity, object], hashtag in assoc(object, :hashtags), as: :hashtag) |> join(:left, [_activity, object], hashtag in assoc(object, :hashtags), as: :hashtag)
|> having( |> having(
[hashtag: hashtag], [hashtag: hashtag],
@ -1167,7 +1237,6 @@ def fetch_activities_query(recipients, opts \\ %{}) do
query = query =
Activity Activity
|> distinct([a], true)
|> maybe_preload_objects(opts) |> maybe_preload_objects(opts)
|> maybe_preload_bookmarks(opts) |> maybe_preload_bookmarks(opts)
|> maybe_preload_report_notes(opts) |> maybe_preload_report_notes(opts)
@ -1199,16 +1268,23 @@ def fetch_activities_query(recipients, opts \\ %{}) do
|> exclude_invisible_actors(opts) |> exclude_invisible_actors(opts)
|> exclude_visibility(opts) |> exclude_visibility(opts)
if Config.object_embedded_hashtags?() do cond do
query Config.object_embedded_hashtags?() ->
|> restrict_tag(opts) query
|> restrict_tag_reject(opts) |> restrict_tag(opts)
|> restrict_tag_all(opts) |> restrict_tag_reject(opts)
else |> restrict_tag_all(opts)
query
|> restrict_hashtag_any(opts) # TODO: benchmark (initial approach preferring non-aggregate ops when possible)
|> restrict_hashtag_all(opts) Config.get([:instance, :improved_hashtag_timeline]) == :join ->
|> restrict_hashtag_reject_any(opts) query
|> distinct([activity], true)
|> restrict_hashtag_any(opts)
|> restrict_hashtag_all(opts)
|> restrict_hashtag_reject_any(opts)
true ->
restrict_hashtag(query, opts)
end end
end end