From 1dac7d14623f36744953a523650211540d90d1fc Mon Sep 17 00:00:00 2001 From: Ivan Tashkinov Date: Mon, 15 Feb 2021 21:13:14 +0300 Subject: [PATCH] [#3213] Fixed `hashtags.name` lookup (must use `citext` type to do index scan). Fixed embedded hashtags lookup (lowercasing), adjusted tests. --- lib/pleroma/hashtag.ex | 8 +++++-- lib/pleroma/web/activity_pub/activity_pub.ex | 22 ++++++++++++++----- .../web/activity_pub/activity_pub_test.exs | 18 +++++++-------- 3 files changed, 31 insertions(+), 17 deletions(-) diff --git a/lib/pleroma/hashtag.ex b/lib/pleroma/hashtag.ex index 0d6a4d09e..a6d033816 100644 --- a/lib/pleroma/hashtag.ex +++ b/lib/pleroma/hashtag.ex @@ -22,7 +22,9 @@ defmodule Pleroma.Hashtag do end def get_by_name(name) do - Repo.get_by(Hashtag, name: name) + from(h in Hashtag) + |> where([h], fragment("name = ?::citext", ^String.downcase(name))) + |> Repo.one() end def get_or_create_by_name(name) when is_bitstring(name) do @@ -37,6 +39,7 @@ def get_or_create_by_name(name) when is_bitstring(name) do end def get_or_create_by_names(names) when is_list(names) do + names = Enum.map(names, &String.downcase/1) timestamp = NaiveDateTime.truncate(NaiveDateTime.utc_now(), :second) structs = @@ -52,7 +55,8 @@ def get_or_create_by_names(names) when is_list(names) do Multi.new() |> Multi.insert_all(:insert_all_op, Hashtag, structs, on_conflict: :nothing) |> Multi.run(:query_op, fn _repo, _changes -> - {:ok, Repo.all(from(ht in Hashtag, where: ht.name in ^names))} + {:ok, + Repo.all(from(ht in Hashtag, where: ht.name in fragment("?::citext[]", ^names)))} end) |> Repo.transaction() do {:ok, hashtags} diff --git a/lib/pleroma/web/activity_pub/activity_pub.ex b/lib/pleroma/web/activity_pub/activity_pub.ex index 9623e635a..e012f2779 100644 --- a/lib/pleroma/web/activity_pub/activity_pub.ex +++ b/lib/pleroma/web/activity_pub/activity_pub.ex @@ -698,6 +698,8 @@ defp restrict_embedded_tag_all(_query, %{tag_all: _tag_all, skip_preload: true}) end defp restrict_embedded_tag_all(query, %{tag_all: [_ | _] = tag_all}) do + tag_all = Enum.map(tag_all, &String.downcase/1) + from( [_activity, object] in query, where: fragment("(?)->'tag' \\?& (?)", object.data, ^tag_all) @@ -714,10 +716,12 @@ defp restrict_embedded_tag_any(_query, %{tag: _tag, skip_preload: true}) do raise_on_missing_preload() end - defp restrict_embedded_tag_any(query, %{tag: [_ | _] = tag}) do + defp restrict_embedded_tag_any(query, %{tag: [_ | _] = tag_any}) do + tag_any = Enum.map(tag_any, &String.downcase/1) + from( [_activity, object] in query, - where: fragment("(?)->'tag' \\?| (?)", object.data, ^tag) + where: fragment("(?)->'tag' \\?| (?)", object.data, ^tag_any) ) end @@ -732,6 +736,8 @@ defp restrict_embedded_tag_reject_any(_query, %{tag_reject: _tag_reject, skip_pr end defp restrict_embedded_tag_reject_any(query, %{tag_reject: [_ | _] = tag_reject}) do + tag_reject = Enum.map(tag_reject, &String.downcase/1) + from( [_activity, object] in query, where: fragment("not (?)->'tag' \\?| (?)", object.data, ^tag_reject) @@ -749,6 +755,10 @@ defp restrict_hashtag_all(_query, %{tag_all: _tag, skip_preload: true}) do raise_on_missing_preload() end + defp restrict_hashtag_all(query, %{tag_all: [single_tag]}) do + restrict_hashtag_any(query, %{tag: single_tag}) + end + defp restrict_hashtag_all(query, %{tag_all: [_ | _] = tags}) do from( [_activity, object] in query, @@ -756,7 +766,7 @@ defp restrict_hashtag_all(query, %{tag_all: [_ | _] = tags}) do fragment( """ (SELECT array_agg(hashtags.name) FROM hashtags JOIN hashtags_objects - ON hashtags_objects.hashtag_id = hashtags.id WHERE hashtags.name = ANY(?) + ON hashtags_objects.hashtag_id = hashtags.id WHERE hashtags.name = ANY(?::citext[]) AND hashtags_objects.object_id = ?) @> ? """, ^tags, @@ -767,7 +777,7 @@ defp restrict_hashtag_all(query, %{tag_all: [_ | _] = tags}) do end defp restrict_hashtag_all(query, %{tag_all: tag}) when is_binary(tag) do - restrict_hashtag_any(query, %{tag: tag}) + restrict_hashtag_all(query, %{tag_all: [tag]}) end defp restrict_hashtag_all(query, _), do: query @@ -783,7 +793,7 @@ defp restrict_hashtag_any(query, %{tag: [_ | _] = tags}) do fragment( """ EXISTS (SELECT 1 FROM hashtags JOIN hashtags_objects - ON hashtags_objects.hashtag_id = hashtags.id WHERE hashtags.name = ANY(?) + ON hashtags_objects.hashtag_id = hashtags.id WHERE hashtags.name = ANY(?::citext[]) AND hashtags_objects.object_id = ? LIMIT 1) """, ^tags, @@ -809,7 +819,7 @@ defp restrict_hashtag_reject_any(query, %{tag_reject: [_ | _] = tags_reject}) do fragment( """ NOT EXISTS (SELECT 1 FROM hashtags JOIN hashtags_objects - ON hashtags_objects.hashtag_id = hashtags.id WHERE hashtags.name = ANY(?) + ON hashtags_objects.hashtag_id = hashtags.id WHERE hashtags.name = ANY(?::citext[]) AND hashtags_objects.object_id = ? LIMIT 1) """, ^tags_reject, diff --git a/test/pleroma/web/activity_pub/activity_pub_test.exs b/test/pleroma/web/activity_pub/activity_pub_test.exs index bab5a199c..c41c8a5dd 100644 --- a/test/pleroma/web/activity_pub/activity_pub_test.exs +++ b/test/pleroma/web/activity_pub/activity_pub_test.exs @@ -213,24 +213,24 @@ test "works for guppe actors" do test "it fetches the appropriate tag-restricted posts" do user = insert(:user) - {:ok, status_one} = CommonAPI.post(user, %{status: ". #test"}) + {:ok, status_one} = CommonAPI.post(user, %{status: ". #TEST"}) {:ok, status_two} = CommonAPI.post(user, %{status: ". #essais"}) - {:ok, status_three} = CommonAPI.post(user, %{status: ". #test #reject"}) + {:ok, status_three} = CommonAPI.post(user, %{status: ". #test #Reject"}) - {:ok, status_four} = CommonAPI.post(user, %{status: ". #any1 #any2"}) - {:ok, status_five} = CommonAPI.post(user, %{status: ". #any2 #any1"}) + {:ok, status_four} = CommonAPI.post(user, %{status: ". #Any1 #any2"}) + {:ok, status_five} = CommonAPI.post(user, %{status: ". #Any2 #any1"}) for hashtag_timeline_strategy <- [true, false] do clear_config([:database, :improved_hashtag_timeline], hashtag_timeline_strategy) fetch_one = ActivityPub.fetch_activities([], %{type: "Create", tag: "test"}) - fetch_two = ActivityPub.fetch_activities([], %{type: "Create", tag: ["test", "essais"]}) + fetch_two = ActivityPub.fetch_activities([], %{type: "Create", tag: ["TEST", "essais"]}) fetch_three = ActivityPub.fetch_activities([], %{ type: "Create", - tag: ["test", "essais"], + tag: ["test", "Essais"], tag_reject: ["reject"] }) @@ -238,21 +238,21 @@ test "it fetches the appropriate tag-restricted posts" do ActivityPub.fetch_activities([], %{ type: "Create", tag: ["test"], - tag_all: ["test", "reject"] + tag_all: ["test", "REJECT"] }) # Testing that deduplication (if needed) is done on DB (not Ecto) level; :limit is important fetch_five = ActivityPub.fetch_activities([], %{ type: "Create", - tag: ["any1", "any2"], + tag: ["ANY1", "any2"], limit: 2 }) fetch_six = ActivityPub.fetch_activities([], %{ type: "Create", - tag: ["any1", "any2"], + tag: ["any1", "Any2"], tag_all: [], tag_reject: [] })