Merge branch 'i1t/pleroma-477_user_search_improvements' into 'develop'

I1t/pleroma 477 user search improvements

See merge request pleroma/pleroma!685
This commit is contained in:
lambda 2019-01-20 10:24:05 +00:00
commit f3045a179e
7 changed files with 230 additions and 41 deletions

View file

@ -35,7 +35,7 @@ defmodule Pleroma.User do
field(:avatar, :map)
field(:local, :boolean, default: true)
field(:follower_address, :string)
field(:search_distance, :float, virtual: true)
field(:search_rank, :float, virtual: true)
field(:tags, {:array, :string}, default: [])
field(:last_refreshed_at, :naive_datetime)
has_many(:notifications, Notification)
@ -510,6 +510,12 @@ def get_followers(user, page \\ nil) do
{:ok, Repo.all(q)}
end
def get_followers_ids(user, page \\ nil) do
q = get_followers_query(user, page)
Repo.all(from(u in q, select: u.id))
end
def get_friends_query(%User{id: id, following: following}, nil) do
from(
u in User,
@ -534,6 +540,12 @@ def get_friends(user, page \\ nil) do
{:ok, Repo.all(q)}
end
def get_friends_ids(user, page \\ nil) do
q = get_friends_query(user, page)
Repo.all(from(u in q, select: u.id))
end
def get_follow_requests_query(%User{} = user) do
from(
a in Activity,
@ -665,37 +677,120 @@ def get_recipients_from_activity(%Activity{recipients: to}) do
Repo.all(query)
end
def search(query, resolve \\ false) do
# strip the beginning @ off if there is a query
def search(query, resolve \\ false, for_user \\ nil) do
# Strip the beginning @ off if there is a query
query = String.trim_leading(query, "@")
if resolve do
User.get_or_fetch_by_nickname(query)
if resolve, do: User.get_or_fetch_by_nickname(query)
fts_results = do_search(fts_search_subquery(query), for_user)
{:ok, trigram_results} =
Repo.transaction(fn ->
Ecto.Adapters.SQL.query(Repo, "select set_limit(0.25)", [])
do_search(trigram_search_subquery(query), for_user)
end)
Enum.uniq_by(fts_results ++ trigram_results, & &1.id)
end
inner =
defp do_search(subquery, for_user, options \\ []) do
q =
from(
s in subquery(subquery),
order_by: [desc: s.search_rank],
limit: ^(options[:limit] || 20)
)
results =
q
|> Repo.all()
|> Enum.filter(&(&1.search_rank > 0))
boost_search_results(results, for_user)
end
defp fts_search_subquery(query) do
processed_query =
query
|> String.replace(~r/\W+/, " ")
|> String.trim()
|> String.split()
|> Enum.map(&(&1 <> ":*"))
|> Enum.join(" | ")
from(
u in User,
select_merge: %{
search_distance:
search_rank:
fragment(
"? <-> (? || coalesce(?, ''))",
"""
ts_rank_cd(
setweight(to_tsvector('simple', regexp_replace(?, '\\W', ' ', 'g')), 'A') ||
setweight(to_tsvector('simple', regexp_replace(coalesce(?, ''), '\\W', ' ', 'g')), 'B'),
to_tsquery('simple', ?),
32
)
""",
u.nickname,
u.name,
^processed_query
)
},
where:
fragment(
"""
(setweight(to_tsvector('simple', regexp_replace(?, '\\W', ' ', 'g')), 'A') ||
setweight(to_tsvector('simple', regexp_replace(coalesce(?, ''), '\\W', ' ', 'g')), 'B')) @@ to_tsquery('simple', ?)
""",
u.nickname,
u.name,
^processed_query
)
)
end
defp trigram_search_subquery(query) do
from(
u in User,
select_merge: %{
search_rank:
fragment(
"similarity(?, trim(? || ' ' || coalesce(?, '')))",
^query,
u.nickname,
u.name
)
},
where: not is_nil(u.nickname)
where: fragment("trim(? || ' ' || coalesce(?, '')) % ?", u.nickname, u.name, ^query)
)
end
q =
from(
s in subquery(inner),
order_by: s.search_distance,
limit: 20
defp boost_search_results(results, nil), do: results
defp boost_search_results(results, for_user) do
friends_ids = get_friends_ids(for_user)
followers_ids = get_followers_ids(for_user)
Enum.map(
results,
fn u ->
search_rank_coef =
cond do
u.id in friends_ids ->
1.2
u.id in followers_ids ->
1.1
true ->
1
end
Map.put(u, :search_rank, u.search_rank * search_rank_coef)
end
)
Repo.all(q)
|> Enum.sort_by(&(-&1.search_rank))
end
def blocks_import(%User{} = blocker, blocked_identifiers) when is_list(blocked_identifiers) do

View file

@ -771,7 +771,7 @@ def status_search(user, query) do
end
def search2(%{assigns: %{user: user}} = conn, %{"q" => query} = params) do
accounts = User.search(query, params["resolve"] == "true")
accounts = User.search(query, params["resolve"] == "true", user)
statuses = status_search(user, query)
@ -795,7 +795,7 @@ def search2(%{assigns: %{user: user}} = conn, %{"q" => query} = params) do
end
def search(%{assigns: %{user: user}} = conn, %{"q" => query} = params) do
accounts = User.search(query, params["resolve"] == "true")
accounts = User.search(query, params["resolve"] == "true", user)
statuses = status_search(user, query)
@ -816,7 +816,7 @@ def search(%{assigns: %{user: user}} = conn, %{"q" => query} = params) do
end
def account_search(%{assigns: %{user: user}} = conn, %{"q" => query} = params) do
accounts = User.search(query, params["resolve"] == "true")
accounts = User.search(query, params["resolve"] == "true", user)
res = AccountView.render("accounts.json", users: accounts, for: user, as: :user)

View file

@ -675,7 +675,7 @@ def search(%{assigns: %{user: user}} = conn, %{"q" => _query} = params) do
end
def search_user(%{assigns: %{user: user}} = conn, %{"query" => query}) do
users = User.search(query, true)
users = User.search(query, true, user)
conn
|> put_view(UserView)

View file

@ -0,0 +1,17 @@
defmodule Pleroma.Repo.Migrations.CreateUserFtsIndex do
use Ecto.Migration
def change do
create index(
:users,
[
"""
(setweight(to_tsvector('simple', regexp_replace(nickname, '\\W', ' ', 'g')), 'A') ||
setweight(to_tsvector('simple', regexp_replace(coalesce(name, ''), '\\W', ' ', 'g')), 'B'))
"""
],
name: :users_fts_index,
using: :gin
)
end
end

View file

@ -0,0 +1,22 @@
defmodule Pleroma.Repo.Migrations.FixUserTrigramIndex do
use Ecto.Migration
def up do
drop_if_exists(index(:users, [], name: :users_trigram_index))
create(
index(:users, ["(trim(nickname || ' ' || coalesce(name, ''))) gist_trgm_ops"],
name: :users_trigram_index,
using: :gist
)
)
end
def down do
drop_if_exists(index(:users, [], name: :users_trigram_index))
create(
index(:users, ["(nickname || name) gist_trgm_ops"], name: :users_trigram_index, using: :gist)
)
end
end

View file

@ -775,14 +775,61 @@ test "User.delete() plugs any possible zombie objects" do
end
describe "User.search" do
test "finds a user, ranking by similarity" do
_user = insert(:user, %{name: "lain"})
_user_two = insert(:user, %{name: "ean"})
_user_three = insert(:user, %{name: "ebn", nickname: "lain@mastodon.social"})
user_four = insert(:user, %{nickname: "lain@pleroma.soykaf.com"})
test "finds a user by full or partial nickname" do
user = insert(:user, %{nickname: "john"})
assert user_four ==
User.search("lain@ple") |> List.first() |> Map.put(:search_distance, nil)
Enum.each(["john", "jo", "j"], fn query ->
assert user == User.search(query) |> List.first() |> Map.put(:search_rank, nil)
end)
end
test "finds a user by full or partial name" do
user = insert(:user, %{name: "John Doe"})
Enum.each(["John Doe", "JOHN", "doe", "j d", "j", "d"], fn query ->
assert user == User.search(query) |> List.first() |> Map.put(:search_rank, nil)
end)
end
test "finds users, preferring nickname matches over name matches" do
u1 = insert(:user, %{name: "lain", nickname: "nick1"})
u2 = insert(:user, %{nickname: "lain", name: "nick1"})
assert [u2.id, u1.id] == Enum.map(User.search("lain"), & &1.id)
end
test "finds users, considering density of matched tokens" do
u1 = insert(:user, %{name: "Bar Bar plus Word Word"})
u2 = insert(:user, %{name: "Word Word Bar Bar Bar"})
assert [u2.id, u1.id] == Enum.map(User.search("bar word"), & &1.id)
end
test "finds users, ranking by similarity" do
u1 = insert(:user, %{name: "lain"})
_u2 = insert(:user, %{name: "ean"})
u3 = insert(:user, %{name: "ebn", nickname: "lain@mastodon.social"})
u4 = insert(:user, %{nickname: "lain@pleroma.soykaf.com"})
assert [u4.id, u3.id, u1.id] == Enum.map(User.search("lain@ple"), & &1.id)
end
test "finds users, handling misspelled requests" do
u1 = insert(:user, %{name: "lain"})
assert [u1.id] == Enum.map(User.search("laiin"), & &1.id)
end
test "finds users, boosting ranks of friends and followers" do
u1 = insert(:user)
u2 = insert(:user, %{name: "Doe"})
follower = insert(:user, %{name: "Doe"})
friend = insert(:user, %{name: "Doe"})
{:ok, follower} = User.follow(follower, u1)
{:ok, u1} = User.follow(u1, friend)
assert [friend.id, follower.id, u2.id] == Enum.map(User.search("doe", false, u1), & &1.id)
end
test "finds a user whose name is nil" do
@ -792,7 +839,15 @@ test "finds a user whose name is nil" do
assert user_two ==
User.search("lain@pleroma.soykaf.com")
|> List.first()
|> Map.put(:search_distance, nil)
|> Map.put(:search_rank, nil)
end
test "does not yield false-positive matches" do
insert(:user, %{name: "John Doe"})
Enum.each(["mary", "a", ""], fn query ->
assert [] == User.search(query)
end)
end
end

View file

@ -1655,16 +1655,16 @@ test "it denies a friend request" do
describe "GET /api/pleroma/search_user" do
test "it returns users, ordered by similarity", %{conn: conn} do
user = insert(:user, %{name: "eal"})
user_two = insert(:user, %{name: "ean"})
user_three = insert(:user, %{name: "ebn"})
user_two = insert(:user, %{name: "eal me"})
_user_three = insert(:user, %{name: "zzz"})
resp =
conn
|> get(twitter_api_search__path(conn, :search_user), query: "eal")
|> get(twitter_api_search__path(conn, :search_user), query: "eal me")
|> json_response(200)
assert length(resp) == 3
assert [user.id, user_two.id, user_three.id] == Enum.map(resp, fn %{"id" => id} -> id end)
assert length(resp) == 2
assert [user_two.id, user.id] == Enum.map(resp, fn %{"id" => id} -> id end)
end
end