[#3213] ActivityPub: implemented subqueries-based hashtags filtering, removed aggregation-based hashtags filtering.
This commit is contained in:
parent
9948ff3356
commit
6fd4163ab6
3 changed files with 81 additions and 154 deletions
|
@ -940,7 +940,7 @@
|
||||||
key: :improved_hashtag_timeline,
|
key: :improved_hashtag_timeline,
|
||||||
type: :keyword,
|
type: :keyword,
|
||||||
description:
|
description:
|
||||||
"If `true` / `:prefer_aggregation`, hashtags table and selected strategy will be used for hashtags timeline. When `false`, object-embedded hashtags will be used (slower). Is auto-set to `true` (unless overridden) when HashtagsTableMigrator completes."
|
"If `true`, hashtags will be fetched from `hashtags` table for hashtags timeline. When `false`, object-embedded hashtags will be used (slower). Is auto-set to `true` (unless overridden) when HashtagsTableMigrator completes."
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|
|
@ -669,24 +669,6 @@ defp restrict_since(query, %{since_id: since_id}) do
|
||||||
|
|
||||||
defp restrict_since(query, _), do: query
|
defp restrict_since(query, _), do: query
|
||||||
|
|
||||||
defp restrict_embedded_tag_reject(_query, %{tag_reject: _tag_reject, skip_preload: true}) do
|
|
||||||
raise_on_missing_preload()
|
|
||||||
end
|
|
||||||
|
|
||||||
defp restrict_embedded_tag_reject(query, %{tag_reject: tag_reject}) when is_list(tag_reject) do
|
|
||||||
from(
|
|
||||||
[_activity, object] in query,
|
|
||||||
where: fragment("not (?)->'tag' \\?| (?)", object.data, ^tag_reject)
|
|
||||||
)
|
|
||||||
end
|
|
||||||
|
|
||||||
defp restrict_embedded_tag_reject(query, %{tag_reject: tag_reject})
|
|
||||||
when is_binary(tag_reject) do
|
|
||||||
restrict_embedded_tag_reject(query, %{tag_reject: [tag_reject]})
|
|
||||||
end
|
|
||||||
|
|
||||||
defp restrict_embedded_tag_reject(query, _), do: query
|
|
||||||
|
|
||||||
defp restrict_embedded_tag_all(_query, %{tag_all: _tag_all, skip_preload: true}) do
|
defp restrict_embedded_tag_all(_query, %{tag_all: _tag_all, skip_preload: true}) do
|
||||||
raise_on_missing_preload()
|
raise_on_missing_preload()
|
||||||
end
|
end
|
||||||
|
@ -699,139 +681,65 @@ defp restrict_embedded_tag_all(query, %{tag_all: tag_all}) when is_list(tag_all)
|
||||||
end
|
end
|
||||||
|
|
||||||
defp restrict_embedded_tag_all(query, %{tag_all: tag}) when is_binary(tag) do
|
defp restrict_embedded_tag_all(query, %{tag_all: tag}) when is_binary(tag) do
|
||||||
restrict_embedded_tag(query, %{tag: tag})
|
restrict_embedded_tag_any(query, %{tag: tag})
|
||||||
end
|
end
|
||||||
|
|
||||||
defp restrict_embedded_tag_all(query, _), do: query
|
defp restrict_embedded_tag_all(query, _), do: query
|
||||||
|
|
||||||
defp restrict_embedded_tag(_query, %{tag: _tag, skip_preload: true}) do
|
defp restrict_embedded_tag_any(_query, %{tag: _tag, skip_preload: true}) do
|
||||||
raise_on_missing_preload()
|
raise_on_missing_preload()
|
||||||
end
|
end
|
||||||
|
|
||||||
defp restrict_embedded_tag(query, %{tag: tag}) when is_list(tag) do
|
defp restrict_embedded_tag_any(query, %{tag: tag}) when is_list(tag) do
|
||||||
from(
|
from(
|
||||||
[_activity, object] in query,
|
[_activity, object] in query,
|
||||||
where: fragment("(?)->'tag' \\?| (?)", object.data, ^tag)
|
where: fragment("(?)->'tag' \\?| (?)", object.data, ^tag)
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
defp restrict_embedded_tag(query, %{tag: tag}) when is_binary(tag) do
|
defp restrict_embedded_tag_any(query, %{tag: tag}) when is_binary(tag) do
|
||||||
restrict_embedded_tag(query, %{tag: [tag]})
|
restrict_embedded_tag_any(query, %{tag: [tag]})
|
||||||
end
|
end
|
||||||
|
|
||||||
defp restrict_embedded_tag(query, _), do: query
|
defp restrict_embedded_tag_any(query, _), do: query
|
||||||
|
|
||||||
defp hashtag_conditions(opts) do
|
defp restrict_embedded_tag_reject_any(_query, %{tag_reject: _tag_reject, skip_preload: true}) do
|
||||||
[:tag, :tag_all, :tag_reject]
|
|
||||||
|> Enum.map(&opts[&1])
|
|
||||||
|> Enum.map(&List.wrap(&1))
|
|
||||||
end
|
|
||||||
|
|
||||||
# Note: times out on larger instances (with default timeout), intended for complex queries
|
|
||||||
defp restrict_hashtag_agg(query, opts) do
|
|
||||||
[tag_any, tag_all, tag_reject] = hashtag_conditions(opts)
|
|
||||||
has_conditions = Enum.any?([tag_any, tag_all, tag_reject], &Enum.any?(&1))
|
|
||||||
|
|
||||||
cond do
|
|
||||||
!has_conditions ->
|
|
||||||
query
|
|
||||||
|
|
||||||
opts[:skip_preload] ->
|
|
||||||
raise_on_missing_preload()
|
|
||||||
|
|
||||||
true ->
|
|
||||||
query
|
|
||||||
|> group_by_all_bindings()
|
|
||||||
|> join(:left, [_activity, object], hashtag in assoc(object, :hashtags), as: :hashtag)
|
|
||||||
|> maybe_restrict_hashtag_any(tag_any)
|
|
||||||
|> maybe_restrict_hashtag_all(tag_all)
|
|
||||||
|> maybe_restrict_hashtag_reject_any(tag_reject)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# Groups by all bindings to allow aggregation on hashtags
|
|
||||||
defp group_by_all_bindings(query) do
|
|
||||||
# Expecting named bindings: :object, :bookmark, :thread_mute, :report_note
|
|
||||||
cond do
|
|
||||||
Enum.count(query.aliases) == 4 ->
|
|
||||||
from([a, o, b3, b4, b5] in query, group_by: [a.id, o.id, b3.id, b4.id, b5.id])
|
|
||||||
|
|
||||||
Enum.count(query.aliases) == 3 ->
|
|
||||||
from([a, o, b3, b4] in query, group_by: [a.id, o.id, b3.id, b4.id])
|
|
||||||
|
|
||||||
Enum.count(query.aliases) == 2 ->
|
|
||||||
from([a, o, b3] in query, group_by: [a.id, o.id, b3.id])
|
|
||||||
|
|
||||||
true ->
|
|
||||||
from([a, o] in query, group_by: [a.id, o.id])
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
defp maybe_restrict_hashtag_any(query, []) do
|
|
||||||
query
|
|
||||||
end
|
|
||||||
|
|
||||||
defp maybe_restrict_hashtag_any(query, tags) do
|
|
||||||
having(
|
|
||||||
query,
|
|
||||||
[hashtag: hashtag],
|
|
||||||
fragment("array_agg(?) && (?)", hashtag.name, ^tags)
|
|
||||||
)
|
|
||||||
end
|
|
||||||
|
|
||||||
defp maybe_restrict_hashtag_all(query, []) do
|
|
||||||
query
|
|
||||||
end
|
|
||||||
|
|
||||||
defp maybe_restrict_hashtag_all(query, tags) do
|
|
||||||
having(
|
|
||||||
query,
|
|
||||||
[hashtag: hashtag],
|
|
||||||
fragment("array_agg(?) @> (?)", hashtag.name, ^tags)
|
|
||||||
)
|
|
||||||
end
|
|
||||||
|
|
||||||
defp maybe_restrict_hashtag_reject_any(query, []) do
|
|
||||||
query
|
|
||||||
end
|
|
||||||
|
|
||||||
defp maybe_restrict_hashtag_reject_any(query, tags) do
|
|
||||||
having(
|
|
||||||
query,
|
|
||||||
[hashtag: hashtag],
|
|
||||||
fragment("not(array_agg(?) && (?))", hashtag.name, ^tags)
|
|
||||||
)
|
|
||||||
end
|
|
||||||
|
|
||||||
defp restrict_hashtag_reject_any(_query, %{tag_reject: _tag_reject, skip_preload: true}) do
|
|
||||||
raise_on_missing_preload()
|
raise_on_missing_preload()
|
||||||
end
|
end
|
||||||
|
|
||||||
defp restrict_hashtag_reject_any(query, %{tag_reject: tags_reject}) when is_list(tags_reject) do
|
defp restrict_embedded_tag_reject_any(query, %{tag_reject: tag_reject})
|
||||||
query
|
when is_list(tag_reject) do
|
||||||
|> group_by_all_bindings()
|
from(
|
||||||
|> join(:left, [_activity, object], hashtag in assoc(object, :hashtags), as: :hashtag)
|
[_activity, object] in query,
|
||||||
|> having(
|
where: fragment("not (?)->'tag' \\?| (?)", object.data, ^tag_reject)
|
||||||
[hashtag: hashtag],
|
|
||||||
fragment("not(array_agg(?) && (?))", hashtag.name, ^tags_reject)
|
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
defp restrict_hashtag_reject_any(query, %{tag_reject: tag_reject}) when is_binary(tag_reject) do
|
defp restrict_embedded_tag_reject_any(query, %{tag_reject: tag_reject})
|
||||||
restrict_hashtag_reject_any(query, %{tag_reject: [tag_reject]})
|
when is_binary(tag_reject) do
|
||||||
|
restrict_embedded_tag_reject_any(query, %{tag_reject: [tag_reject]})
|
||||||
end
|
end
|
||||||
|
|
||||||
defp restrict_hashtag_reject_any(query, _), do: query
|
defp restrict_embedded_tag_reject_any(query, _), do: query
|
||||||
|
|
||||||
defp restrict_hashtag_all(_query, %{tag_all: _tag, skip_preload: true}) do
|
defp restrict_hashtag_all(_query, %{tag_all: _tag, skip_preload: true}) do
|
||||||
raise_on_missing_preload()
|
raise_on_missing_preload()
|
||||||
end
|
end
|
||||||
|
|
||||||
defp restrict_hashtag_all(query, %{tag_all: tags}) when is_list(tags) do
|
defp restrict_hashtag_all(query, %{tag_all: tags}) when is_list(tags) do
|
||||||
Enum.reduce(
|
from(
|
||||||
tags,
|
[_activity, object] in query,
|
||||||
query,
|
where:
|
||||||
fn tag, acc -> restrict_hashtag_any(acc, %{tag: tag}) end
|
fragment(
|
||||||
|
"""
|
||||||
|
(SELECT array_agg(hashtags.name) FROM hashtags JOIN hashtags_objects
|
||||||
|
ON hashtags_objects.hashtag_id = hashtags.id WHERE hashtags.name = ANY(?)
|
||||||
|
AND hashtags_objects.object_id = ?) @> ?
|
||||||
|
""",
|
||||||
|
^tags,
|
||||||
|
object.id,
|
||||||
|
^tags
|
||||||
|
)
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -846,18 +754,19 @@ defp restrict_hashtag_any(_query, %{tag: _tag, skip_preload: true}) do
|
||||||
end
|
end
|
||||||
|
|
||||||
defp restrict_hashtag_any(query, %{tag: tags}) when is_list(tags) do
|
defp restrict_hashtag_any(query, %{tag: tags}) when is_list(tags) do
|
||||||
query =
|
from(
|
||||||
from(
|
[_activity, object] in query,
|
||||||
[_activity, object] in query,
|
where:
|
||||||
join: hashtag in assoc(object, :hashtags),
|
fragment(
|
||||||
where: hashtag.name in ^tags
|
"""
|
||||||
)
|
EXISTS (SELECT 1 FROM hashtags JOIN hashtags_objects
|
||||||
|
ON hashtags_objects.hashtag_id = hashtags.id WHERE hashtags.name = ANY(?)
|
||||||
if length(tags) > 1 do
|
AND hashtags_objects.object_id = ? LIMIT 1)
|
||||||
distinct(query, [activity], true)
|
""",
|
||||||
else
|
^tags,
|
||||||
query
|
object.id
|
||||||
end
|
)
|
||||||
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
defp restrict_hashtag_any(query, %{tag: tag}) when is_binary(tag) do
|
defp restrict_hashtag_any(query, %{tag: tag}) when is_binary(tag) do
|
||||||
|
@ -866,6 +775,32 @@ defp restrict_hashtag_any(query, %{tag: tag}) when is_binary(tag) do
|
||||||
|
|
||||||
defp restrict_hashtag_any(query, _), do: query
|
defp restrict_hashtag_any(query, _), do: query
|
||||||
|
|
||||||
|
defp restrict_hashtag_reject_any(_query, %{tag_reject: _tag_reject, skip_preload: true}) do
|
||||||
|
raise_on_missing_preload()
|
||||||
|
end
|
||||||
|
|
||||||
|
defp restrict_hashtag_reject_any(query, %{tag_reject: tags_reject}) when is_list(tags_reject) do
|
||||||
|
from(
|
||||||
|
[_activity, object] in query,
|
||||||
|
where:
|
||||||
|
fragment(
|
||||||
|
"""
|
||||||
|
NOT EXISTS (SELECT 1 FROM hashtags JOIN hashtags_objects
|
||||||
|
ON hashtags_objects.hashtag_id = hashtags.id WHERE hashtags.name = ANY(?)
|
||||||
|
AND hashtags_objects.object_id = ? LIMIT 1)
|
||||||
|
""",
|
||||||
|
^tags_reject,
|
||||||
|
object.id
|
||||||
|
)
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
|
defp restrict_hashtag_reject_any(query, %{tag_reject: tag_reject}) when is_binary(tag_reject) do
|
||||||
|
restrict_hashtag_reject_any(query, %{tag_reject: [tag_reject]})
|
||||||
|
end
|
||||||
|
|
||||||
|
defp restrict_hashtag_reject_any(query, _), do: query
|
||||||
|
|
||||||
defp raise_on_missing_preload do
|
defp raise_on_missing_preload do
|
||||||
raise "Can't use the child object without preloading!"
|
raise "Can't use the child object without preloading!"
|
||||||
end
|
end
|
||||||
|
@ -1286,23 +1221,16 @@ def fetch_activities_query(recipients, opts \\ %{}) do
|
||||||
|> exclude_invisible_actors(opts)
|
|> exclude_invisible_actors(opts)
|
||||||
|> exclude_visibility(opts)
|
|> exclude_visibility(opts)
|
||||||
|
|
||||||
hashtag_timeline_strategy = Config.improved_hashtag_timeline()
|
if Config.improved_hashtag_timeline() do
|
||||||
|
query
|
||||||
cond do
|
|> restrict_hashtag_any(opts)
|
||||||
!hashtag_timeline_strategy ->
|
|> restrict_hashtag_all(opts)
|
||||||
query
|
|> restrict_hashtag_reject_any(opts)
|
||||||
|> restrict_embedded_tag(opts)
|
else
|
||||||
|> restrict_embedded_tag_reject(opts)
|
query
|
||||||
|> restrict_embedded_tag_all(opts)
|
|> restrict_embedded_tag_any(opts)
|
||||||
|
|> restrict_embedded_tag_all(opts)
|
||||||
hashtag_timeline_strategy == :prefer_aggregation ->
|
|> restrict_embedded_tag_reject_any(opts)
|
||||||
restrict_hashtag_agg(query, opts)
|
|
||||||
|
|
||||||
true ->
|
|
||||||
query
|
|
||||||
|> restrict_hashtag_any(opts)
|
|
||||||
|> restrict_hashtag_all(opts)
|
|
||||||
|> restrict_hashtag_reject_any(opts)
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -220,7 +220,7 @@ test "it fetches the appropriate tag-restricted posts" do
|
||||||
{:ok, status_four} = CommonAPI.post(user, %{status: ". #any1 #any2"})
|
{:ok, status_four} = CommonAPI.post(user, %{status: ". #any1 #any2"})
|
||||||
{:ok, status_five} = CommonAPI.post(user, %{status: ". #any2 #any1"})
|
{:ok, status_five} = CommonAPI.post(user, %{status: ". #any2 #any1"})
|
||||||
|
|
||||||
for hashtag_timeline_strategy <- [true, :prefer_aggregation, false] do
|
for hashtag_timeline_strategy <- [true, false] do
|
||||||
clear_config([:instance, :improved_hashtag_timeline], hashtag_timeline_strategy)
|
clear_config([:instance, :improved_hashtag_timeline], hashtag_timeline_strategy)
|
||||||
|
|
||||||
fetch_one = ActivityPub.fetch_activities([], %{type: "Create", tag: "test"})
|
fetch_one = ActivityPub.fetch_activities([], %{type: "Create", tag: "test"})
|
||||||
|
@ -241,8 +241,7 @@ test "it fetches the appropriate tag-restricted posts" do
|
||||||
tag_all: ["test", "reject"]
|
tag_all: ["test", "reject"]
|
||||||
})
|
})
|
||||||
|
|
||||||
# This test would fail if JOIN with 2+ terms in "any" clause is done without DISTINCT.
|
# Testing that deduplication (if needed) is done on DB (not Ecto) level; :limit is important
|
||||||
# The :limit is important (w/o DISTINCT 2 records are deduped by Ecto to 1 b/c of preload).
|
|
||||||
fetch_five =
|
fetch_five =
|
||||||
ActivityPub.fetch_activities([], %{
|
ActivityPub.fetch_activities([], %{
|
||||||
type: "Create",
|
type: "Create",
|
||||||
|
|
Loading…
Reference in a new issue