From 704a3830556d94e0dbc39873480e9ba95a143be9 Mon Sep 17 00:00:00 2001 From: Ivan Tashkinov Date: Wed, 8 Jul 2020 13:14:18 +0300 Subject: [PATCH 1/2] Improved search results for localized nickname match. Tweaked user search to rank nickname matches higher than name matches. --- lib/pleroma/user/search.ex | 8 +++++++- test/tasks/user_test.exs | 16 ++++++++-------- test/user_search_test.exs | 35 +++++++++++++++++++++++++++++------ 3 files changed, 44 insertions(+), 15 deletions(-) diff --git a/lib/pleroma/user/search.ex b/lib/pleroma/user/search.ex index 42ff1de78..7ff1c7e24 100644 --- a/lib/pleroma/user/search.ex +++ b/lib/pleroma/user/search.ex @@ -88,15 +88,21 @@ defp to_tsquery(query_string) do |> Enum.join(" | ") end + # Considers nickname match, localized nickname match, name match; preferences nickname match defp trigram_rank(query, query_string) do from( u in query, select_merge: %{ search_rank: fragment( - "similarity(?, trim(? || ' ' || coalesce(?, '')))", + "similarity(?, ?) + \ + similarity(?, regexp_replace(?, '@.+', '')) + \ + similarity(?, trim(coalesce(?, '')))", ^query_string, u.nickname, + ^query_string, + u.nickname, + ^query_string, u.name ) } diff --git a/test/tasks/user_test.exs b/test/tasks/user_test.exs index 9220d23fc..7bb49b038 100644 --- a/test/tasks/user_test.exs +++ b/test/tasks/user_test.exs @@ -464,17 +464,17 @@ test "it returns users matching" do moot = insert(:user, nickname: "moot") kawen = insert(:user, nickname: "kawen", name: "fediverse expert moon") - {:ok, user} = User.follow(user, kawen) + {:ok, user} = User.follow(user, moon) assert [moon.id, kawen.id] == User.Search.search("moon") |> Enum.map(& &1.id) - res = User.search("moo") |> Enum.map(& &1.id) - assert moon.id in res - assert moot.id in res - assert kawen.id in res - assert [moon.id, kawen.id] == User.Search.search("moon fediverse") |> Enum.map(& &1.id) - assert [kawen.id, moon.id] == - User.Search.search("moon fediverse", for_user: user) |> Enum.map(& &1.id) + res = User.search("moo") |> Enum.map(& &1.id) + assert Enum.sort([moon.id, moot.id, kawen.id]) == Enum.sort(res) + + assert [kawen.id, moon.id] == User.Search.search("expert fediverse") |> Enum.map(& &1.id) + + assert [moon.id, kawen.id] == + User.Search.search("expert fediverse", for_user: user) |> Enum.map(& &1.id) end end diff --git a/test/user_search_test.exs b/test/user_search_test.exs index f030523d3..758822072 100644 --- a/test/user_search_test.exs +++ b/test/user_search_test.exs @@ -46,30 +46,53 @@ test "accepts offset parameter" do assert length(User.search("john", limit: 3, offset: 3)) == 2 end - test "finds a user by full or partial nickname" do + defp clear_virtual_fields(user) do + Map.merge(user, %{search_rank: nil, search_type: nil}) + end + + test "finds a user by full nickname or its leading fragment" do user = insert(:user, %{nickname: "john"}) Enum.each(["john", "jo", "j"], fn query -> assert user == User.search(query) |> List.first() - |> Map.put(:search_rank, nil) - |> Map.put(:search_type, nil) + |> clear_virtual_fields() end) end - test "finds a user by full or partial name" do + test "finds a user by full name or leading fragment(s) of its words" do user = insert(:user, %{name: "John Doe"}) Enum.each(["John Doe", "JOHN", "doe", "j d", "j", "d"], fn query -> assert user == User.search(query) |> List.first() - |> Map.put(:search_rank, nil) - |> Map.put(:search_type, nil) + |> clear_virtual_fields() end) end + test "is not [yet] capable of matching by non-leading fragments (e.g. by domain)" do + user1 = insert(:user, %{nickname: "iamthedude"}) + insert(:user, %{nickname: "arandom@dude.com"}) + + assert [] == User.search("dude") + + # Matching by leading fragment works, though + user1_id = user1.id + assert ^user1_id = User.search("iam") |> List.first() |> Map.get(:id) + end + + test "ranks full nickname match higher than full name match" do + nicknamed_user = insert(:user, %{nickname: "hj@shigusegubu.club"}) + named_user = insert(:user, %{nickname: "xyz@sample.com", name: "HJ"}) + + results = User.search("hj") + + assert [nicknamed_user.id, named_user.id] == Enum.map(results, & &1.id) + assert Enum.at(results, 0).search_rank > Enum.at(results, 1).search_rank + end + test "finds users, considering density of matched tokens" do u1 = insert(:user, %{name: "Bar Bar plus Word Word"}) u2 = insert(:user, %{name: "Word Word Bar Bar Bar"}) From 123352ffa1c80aab658fca0c2276d1c06de43a02 Mon Sep 17 00:00:00 2001 From: Ivan Tashkinov Date: Wed, 8 Jul 2020 22:50:15 +0300 Subject: [PATCH 2/2] Removed unused trigram index on `users`. Fixed `users_fts_index` usage. --- lib/pleroma/user/search.ex | 16 +++++++++++----- .../20200708193702_drop_user_trigram_index.exs | 18 ++++++++++++++++++ test/user_search_test.exs | 12 ++++-------- 3 files changed, 33 insertions(+), 13 deletions(-) create mode 100644 priv/repo/migrations/20200708193702_drop_user_trigram_index.exs diff --git a/lib/pleroma/user/search.ex b/lib/pleroma/user/search.ex index 7ff1c7e24..d4fd31069 100644 --- a/lib/pleroma/user/search.ex +++ b/lib/pleroma/user/search.ex @@ -69,11 +69,15 @@ defp fts_search(query, query_string) do u in query, where: fragment( + # The fragment must _exactly_ match `users_fts_index`, otherwise the index won't work """ - (to_tsvector('simple', ?) || to_tsvector('simple', ?)) @@ to_tsquery('simple', ?) + ( + setweight(to_tsvector('simple', regexp_replace(?, '\\W', ' ', 'g')), 'A') || + setweight(to_tsvector('simple', regexp_replace(coalesce(?, ''), '\\W', ' ', 'g')), 'B') + ) @@ to_tsquery('simple', ?) """, - u.name, u.nickname, + u.name, ^query_string ) ) @@ -95,9 +99,11 @@ defp trigram_rank(query, query_string) do select_merge: %{ search_rank: fragment( - "similarity(?, ?) + \ - similarity(?, regexp_replace(?, '@.+', '')) + \ - similarity(?, trim(coalesce(?, '')))", + """ + similarity(?, ?) + + similarity(?, regexp_replace(?, '@.+', '')) + + similarity(?, trim(coalesce(?, ''))) + """, ^query_string, u.nickname, ^query_string, diff --git a/priv/repo/migrations/20200708193702_drop_user_trigram_index.exs b/priv/repo/migrations/20200708193702_drop_user_trigram_index.exs new file mode 100644 index 000000000..94efe323a --- /dev/null +++ b/priv/repo/migrations/20200708193702_drop_user_trigram_index.exs @@ -0,0 +1,18 @@ +defmodule Pleroma.Repo.Migrations.DropUserTrigramIndex do + @moduledoc "Drops unused trigram index on `users` (FTS index is being used instead)" + + use Ecto.Migration + + def up do + drop_if_exists(index(:users, [], name: :users_trigram_index)) + end + + def down do + create_if_not_exists( + index(:users, ["(trim(nickname || ' ' || coalesce(name, ''))) gist_trgm_ops"], + name: :users_trigram_index, + using: :gist + ) + ) + end +end diff --git a/test/user_search_test.exs b/test/user_search_test.exs index 758822072..559ba5966 100644 --- a/test/user_search_test.exs +++ b/test/user_search_test.exs @@ -72,15 +72,11 @@ test "finds a user by full name or leading fragment(s) of its words" do end) end - test "is not [yet] capable of matching by non-leading fragments (e.g. by domain)" do - user1 = insert(:user, %{nickname: "iamthedude"}) - insert(:user, %{nickname: "arandom@dude.com"}) + test "matches by leading fragment of user domain" do + user = insert(:user, %{nickname: "arandom@dude.com"}) + insert(:user, %{nickname: "iamthedude"}) - assert [] == User.search("dude") - - # Matching by leading fragment works, though - user1_id = user1.id - assert ^user1_id = User.search("iam") |> List.first() |> Map.get(:id) + assert [user.id] == User.search("dud") |> Enum.map(& &1.id) end test "ranks full nickname match higher than full name match" do