From 7f8785fd9be11fbb09283c2dbd32aeb7903a4f58 Mon Sep 17 00:00:00 2001
From: Ivan Tashkinov <ivantashkinov@gmail.com>
Date: Sun, 7 Mar 2021 11:33:21 +0300
Subject: [PATCH] [#3213] Performance optimization of filtering by hashtags
 ("any" condition).

---
 lib/pleroma/pagination.ex                     |  3 ++
 lib/pleroma/web/activity_pub/activity_pub.ex  | 47 ++++++++++++++-----
 .../controllers/timeline_controller.ex        | 39 ++++++---------
 3 files changed, 53 insertions(+), 36 deletions(-)

diff --git a/lib/pleroma/pagination.ex b/lib/pleroma/pagination.ex
index 0d24e1010..33e45a0eb 100644
--- a/lib/pleroma/pagination.ex
+++ b/lib/pleroma/pagination.ex
@@ -93,6 +93,7 @@ defp cast_params(params) do
       max_id: :string,
       offset: :integer,
       limit: :integer,
+      skip_extra_order: :boolean,
       skip_order: :boolean
     }
 
@@ -114,6 +115,8 @@ defp restrict(query, :max_id, %{max_id: max_id}, table_binding) do
 
   defp restrict(query, :order, %{skip_order: true}, _), do: query
 
+  defp restrict(%{order_bys: [_ | _]} = query, :order, %{skip_extra_order: true}, _), do: query
+
   defp restrict(query, :order, %{min_id: _}, table_binding) do
     order_by(
       query,
diff --git a/lib/pleroma/web/activity_pub/activity_pub.ex b/lib/pleroma/web/activity_pub/activity_pub.ex
index a4b48ec9b..230faf024 100644
--- a/lib/pleroma/web/activity_pub/activity_pub.ex
+++ b/lib/pleroma/web/activity_pub/activity_pub.ex
@@ -466,6 +466,23 @@ def fetch_latest_direct_activity_id_for_context(context, opts \\ %{}) do
     |> Repo.one()
   end
 
+  defp fetch_paginated_optimized(query, opts, pagination) do
+    # Note: tag-filtering funcs may apply "ORDER BY objects.id DESC",
+    #   and extra sorting on "activities.id DESC NULLS LAST" would worse the query plan
+    opts = Map.put(opts, :skip_extra_order, true)
+
+    Pagination.fetch_paginated(query, opts, pagination)
+  end
+
+  def fetch_activities(recipients, opts \\ %{}, pagination \\ :keyset) do
+    list_memberships = Pleroma.List.memberships(opts[:user])
+
+    fetch_activities_query(recipients ++ list_memberships, opts)
+    |> fetch_paginated_optimized(opts, pagination)
+    |> Enum.reverse()
+    |> maybe_update_cc(list_memberships, opts[:user])
+  end
+
   @spec fetch_public_or_unlisted_activities(map(), Pagination.type()) :: [Activity.t()]
   def fetch_public_or_unlisted_activities(opts \\ %{}, pagination \\ :keyset) do
     opts = Map.delete(opts, :user)
@@ -473,7 +490,7 @@ def fetch_public_or_unlisted_activities(opts \\ %{}, pagination \\ :keyset) do
     [Constants.as_public()]
     |> fetch_activities_query(opts)
     |> restrict_unlisted(opts)
-    |> Pagination.fetch_paginated(opts, pagination)
+    |> fetch_paginated_optimized(opts, pagination)
   end
 
   @spec fetch_public_activities(map(), Pagination.type()) :: [Activity.t()]
@@ -751,6 +768,7 @@ defp object_ids_query_for_tags(tags) do
     |> join(:inner, [hto], ht in Pleroma.Hashtag, on: hto.hashtag_id == ht.id)
     |> where([hto, ht], ht.name in ^tags)
     |> select([hto], hto.object_id)
+    |> distinct([hto], true)
   end
 
   defp restrict_hashtag_all(_query, %{tag_all: _tag, skip_preload: true}) do
@@ -789,9 +807,18 @@ defp restrict_hashtag_any(_query, %{tag: _tag, skip_preload: true}) do
   end
 
   defp restrict_hashtag_any(query, %{tag: [_ | _] = tags}) do
+    hashtag_ids =
+      from(ht in Hashtag, where: ht.name in ^tags, select: ht.id)
+      |> Repo.all()
+
+    # Note: NO extra ordering should be done on "activities.id desc nulls last" for optimal plan
     from(
       [_activity, object] in query,
-      where: object.id in subquery(object_ids_query_for_tags(tags))
+      join: hto in "hashtags_objects",
+      on: hto.object_id == object.id,
+      where: hto.hashtag_id in ^hashtag_ids,
+      distinct: [desc: object.id],
+      order_by: [desc: object.id]
     )
   end
 
@@ -1188,7 +1215,12 @@ defp normalize_fetch_activities_query_opts(opts) do
           Map.put(opts, key, Hashtag.normalize_name(value))
 
         value when is_list(value) ->
-          Map.put(opts, key, Enum.map(value, &Hashtag.normalize_name/1))
+          normalized_value =
+            value
+            |> Enum.map(&Hashtag.normalize_name/1)
+            |> Enum.uniq()
+
+          Map.put(opts, key, normalized_value)
 
         _ ->
           opts
@@ -1275,15 +1307,6 @@ def fetch_activities_query(recipients, opts \\ %{}) do
     end
   end
 
-  def fetch_activities(recipients, opts \\ %{}, pagination \\ :keyset) do
-    list_memberships = Pleroma.List.memberships(opts[:user])
-
-    fetch_activities_query(recipients ++ list_memberships, opts)
-    |> Pagination.fetch_paginated(opts, pagination)
-    |> Enum.reverse()
-    |> maybe_update_cc(list_memberships, opts[:user])
-  end
-
   @doc """
   Fetch favorites activities of user with order by sort adds to favorites
   """
diff --git a/lib/pleroma/web/mastodon_api/controllers/timeline_controller.ex b/lib/pleroma/web/mastodon_api/controllers/timeline_controller.ex
index 87effa00b..c611958be 100644
--- a/lib/pleroma/web/mastodon_api/controllers/timeline_controller.ex
+++ b/lib/pleroma/web/mastodon_api/controllers/timeline_controller.ex
@@ -133,34 +133,25 @@ defp fail_on_bad_auth(conn) do
   end
 
   defp hashtag_fetching(params, user, local_only) do
-    tags =
+    # Note: not sanitizing tag options at this stage (may be mix-cased, have duplicates etc.)
+    tags_any =
       [params[:tag], params[:any]]
       |> List.flatten()
-      |> Enum.reject(&is_nil/1)
-      |> Enum.map(&String.downcase/1)
-      |> Enum.uniq()
+      |> Enum.filter(& &1)
 
-    tag_all =
-      params
-      |> Map.get(:all, [])
-      |> Enum.map(&String.downcase/1)
+    tag_all = Map.get(params, :all, [])
+    tag_reject = Map.get(params, :none, [])
 
-    tag_reject =
-      params
-      |> Map.get(:none, [])
-      |> Enum.map(&String.downcase/1)
-
-    _activities =
-      params
-      |> Map.put(:type, "Create")
-      |> Map.put(:local_only, local_only)
-      |> Map.put(:blocking_user, user)
-      |> Map.put(:muting_user, user)
-      |> Map.put(:user, user)
-      |> Map.put(:tag, tags)
-      |> Map.put(:tag_all, tag_all)
-      |> Map.put(:tag_reject, tag_reject)
-      |> ActivityPub.fetch_public_activities()
+    params
+    |> Map.put(:type, "Create")
+    |> Map.put(:local_only, local_only)
+    |> Map.put(:blocking_user, user)
+    |> Map.put(:muting_user, user)
+    |> Map.put(:user, user)
+    |> Map.put(:tag, tags_any)
+    |> Map.put(:tag_all, tag_all)
+    |> Map.put(:tag_reject, tag_reject)
+    |> ActivityPub.fetch_public_activities()
   end
 
   # GET /api/v1/timelines/tag/:tag