From 5b04f07a1ebe6763270b406aa6638336cab04a31 Mon Sep 17 00:00:00 2001 From: Egor Kislitsyn Date: Wed, 5 Jun 2019 16:34:14 +0700 Subject: [PATCH] Limit search for unauthenticated users to local users only --- lib/pleroma/user.ex | 117 +------------- lib/pleroma/user/search.ex | 145 ++++++++++++++++++ test/user_test.exs | 30 +++- .../mastodon_api_controller_test.exs | 3 + 4 files changed, 178 insertions(+), 117 deletions(-) create mode 100644 lib/pleroma/user/search.ex diff --git a/lib/pleroma/user.ex b/lib/pleroma/user.ex index dc534b05c..498428269 100644 --- a/lib/pleroma/user.ex +++ b/lib/pleroma/user.ex @@ -735,121 +735,6 @@ def get_recipients_from_activity(%Activity{recipients: to}) do |> Repo.all() end - def search(query, resolve \\ false, for_user \\ nil) do - # Strip the beginning @ off if there is a query - query = String.trim_leading(query, "@") - - if resolve, do: get_or_fetch(query) - - {:ok, results} = - Repo.transaction(fn -> - Ecto.Adapters.SQL.query(Repo, "select set_limit(0.25)", []) - Repo.all(search_query(query, for_user)) - end) - - results - end - - def search_query(query, for_user) do - fts_subquery = fts_search_subquery(query) - trigram_subquery = trigram_search_subquery(query) - union_query = from(s in trigram_subquery, union_all: ^fts_subquery) - distinct_query = from(s in subquery(union_query), order_by: s.search_type, distinct: s.id) - - from(s in subquery(boost_search_rank_query(distinct_query, for_user)), - order_by: [desc: s.search_rank], - limit: 40 - ) - end - - defp boost_search_rank_query(query, nil), do: query - - defp boost_search_rank_query(query, for_user) do - friends_ids = get_friends_ids(for_user) - followers_ids = get_followers_ids(for_user) - - from(u in subquery(query), - select_merge: %{ - search_rank: - fragment( - """ - CASE WHEN (?) THEN (?) * 1.3 - WHEN (?) THEN (?) * 1.2 - WHEN (?) THEN (?) * 1.1 - ELSE (?) END - """, - u.id in ^friends_ids and u.id in ^followers_ids, - u.search_rank, - u.id in ^friends_ids, - u.search_rank, - u.id in ^followers_ids, - u.search_rank, - u.search_rank - ) - } - ) - end - - defp fts_search_subquery(term, query \\ User) do - processed_query = - term - |> String.replace(~r/\W+/, " ") - |> String.trim() - |> String.split() - |> Enum.map(&(&1 <> ":*")) - |> Enum.join(" | ") - - from( - u in query, - select_merge: %{ - search_type: ^0, - search_rank: - fragment( - """ - ts_rank_cd( - setweight(to_tsvector('simple', regexp_replace(?, '\\W', ' ', 'g')), 'A') || - setweight(to_tsvector('simple', regexp_replace(coalesce(?, ''), '\\W', ' ', 'g')), 'B'), - to_tsquery('simple', ?), - 32 - ) - """, - u.nickname, - u.name, - ^processed_query - ) - }, - where: - fragment( - """ - (setweight(to_tsvector('simple', regexp_replace(?, '\\W', ' ', 'g')), 'A') || - setweight(to_tsvector('simple', regexp_replace(coalesce(?, ''), '\\W', ' ', 'g')), 'B')) @@ to_tsquery('simple', ?) - """, - u.nickname, - u.name, - ^processed_query - ) - ) - |> restrict_deactivated() - end - - defp trigram_search_subquery(term) do - from( - u in User, - select_merge: %{ - # ^1 gives 'Postgrex expected a binary, got 1' for some weird reason - search_type: fragment("?", 1), - search_rank: - fragment( - "similarity(?, trim(? || ' ' || coalesce(?, '')))", - ^term, - u.nickname, - u.name - ) - }, - where: fragment("trim(? || ' ' || coalesce(?, '')) % ?", u.nickname, u.name, ^term) - ) - |> restrict_deactivated() - end def mute(muter, %User{ap_id: ap_id}) do info_cng = @@ -1449,4 +1334,6 @@ def get_ap_ids_by_nicknames(nicknames) do ) |> Repo.all() end + + defdelegate search(query, opts \\ []), to: User.Search end diff --git a/lib/pleroma/user/search.ex b/lib/pleroma/user/search.ex new file mode 100644 index 000000000..d5b2eaa9f --- /dev/null +++ b/lib/pleroma/user/search.ex @@ -0,0 +1,145 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2019 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.User.Search do + alias Pleroma.Repo + alias Pleroma.User + import Ecto.Query + + def search(query, opts \\ []) do + resolve = Keyword.get(opts, :resolve, false) + for_user = Keyword.get(opts, :for_user) + + # Strip the beginning @ off if there is a query + query = String.trim_leading(query, "@") + + if match?(%User{}, for_user) and resolve, do: User.get_or_fetch(query) + + {:ok, results} = + Repo.transaction(fn -> + Ecto.Adapters.SQL.query(Repo, "select set_limit(0.25)", []) + + query + |> search_query(for_user) + |> Repo.all() + end) + + results + end + + defp search_query(query, for_user) do + query + |> union_query() + |> distinct_query() + |> boost_search_rank_query(for_user) + |> subquery() + |> order_by(desc: :search_rank) + |> limit(20) + |> maybe_restrict_local(for_user) + end + + defp union_query(query) do + fts_subquery = fts_search_subquery(query) + trigram_subquery = trigram_search_subquery(query) + + from(s in trigram_subquery, union_all: ^fts_subquery) + end + + defp distinct_query(q) do + from(s in subquery(q), order_by: s.search_type, distinct: s.id) + end + + # unauthenticated users can only search local activities + defp maybe_restrict_local(q, %User{}), do: q + defp maybe_restrict_local(q, _), do: where(q, [u], u.local == true) + + defp boost_search_rank_query(query, nil), do: query + + defp boost_search_rank_query(query, for_user) do + friends_ids = User.get_friends_ids(for_user) + followers_ids = User.get_followers_ids(for_user) + + from(u in subquery(query), + select_merge: %{ + search_rank: + fragment( + """ + CASE WHEN (?) THEN (?) * 1.3 + WHEN (?) THEN (?) * 1.2 + WHEN (?) THEN (?) * 1.1 + ELSE (?) END + """, + u.id in ^friends_ids and u.id in ^followers_ids, + u.search_rank, + u.id in ^friends_ids, + u.search_rank, + u.id in ^followers_ids, + u.search_rank, + u.search_rank + ) + } + ) + end + + defp fts_search_subquery(term, query \\ User) do + processed_query = + term + |> String.replace(~r/\W+/, " ") + |> String.trim() + |> String.split() + |> Enum.map(&(&1 <> ":*")) + |> Enum.join(" | ") + + from( + u in query, + select_merge: %{ + search_type: ^0, + search_rank: + fragment( + """ + ts_rank_cd( + setweight(to_tsvector('simple', regexp_replace(?, '\\W', ' ', 'g')), 'A') || + setweight(to_tsvector('simple', regexp_replace(coalesce(?, ''), '\\W', ' ', 'g')), 'B'), + to_tsquery('simple', ?), + 32 + ) + """, + u.nickname, + u.name, + ^processed_query + ) + }, + where: + fragment( + """ + (setweight(to_tsvector('simple', regexp_replace(?, '\\W', ' ', 'g')), 'A') || + setweight(to_tsvector('simple', regexp_replace(coalesce(?, ''), '\\W', ' ', 'g')), 'B')) @@ to_tsquery('simple', ?) + """, + u.nickname, + u.name, + ^processed_query + ) + ) + |> User.restrict_deactivated() + end + + defp trigram_search_subquery(term) do + from( + u in User, + select_merge: %{ + # ^1 gives 'Postgrex expected a binary, got 1' for some weird reason + search_type: fragment("?", 1), + search_rank: + fragment( + "similarity(?, trim(? || ' ' || coalesce(?, '')))", + ^term, + u.nickname, + u.name + ) + }, + where: fragment("trim(? || ' ' || coalesce(?, '')) % ?", u.nickname, u.name, ^term) + ) + |> User.restrict_deactivated() + end +end diff --git a/test/user_test.exs b/test/user_test.exs index d7473ef43..1a82aa6f7 100644 --- a/test/user_test.exs +++ b/test/user_test.exs @@ -1055,7 +1055,7 @@ test "finds users, ranking by similarity" do u3 = insert(:user, %{name: "ebn", nickname: "lain@mastodon.social"}) u4 = insert(:user, %{nickname: "lain@pleroma.soykaf.com"}) - assert [u4.id, u3.id, u1.id] == Enum.map(User.search("lain@ple"), & &1.id) + assert [u4.id, u3.id, u1.id] == Enum.map(User.search("lain@ple", for_user: u1), & &1.id) end test "finds users, handling misspelled requests" do @@ -1077,6 +1077,28 @@ test "finds users, boosting ranks of friends and followers" do Enum.map(User.search("doe", resolve: false, for_user: u1), & &1.id) == [] end + test "find local and remote statuses for authenticated users" do + u1 = insert(:user, %{name: "lain"}) + u2 = insert(:user, %{name: "ebn", nickname: "lain@mastodon.social", local: false}) + u3 = insert(:user, %{nickname: "lain@pleroma.soykaf.com", local: false}) + + results = + "lain" + |> User.search(for_user: u1) + |> Enum.map(& &1.id) + |> Enum.sort() + + assert [u1.id, u2.id, u3.id] == results + end + + test "find only local statuses for unauthenticated users" do + %{id: id} = insert(:user, %{name: "lain"}) + insert(:user, %{name: "ebn", nickname: "lain@mastodon.social", local: false}) + insert(:user, %{nickname: "lain@pleroma.soykaf.com", local: false}) + + assert [%{id: ^id}] = User.search("lain") + end + test "finds a user whose name is nil" do _user = insert(:user, %{name: "notamatch", nickname: "testuser@pleroma.amplifie.red"}) user_two = insert(:user, %{name: nil, nickname: "lain@pleroma.soykaf.com"}) @@ -1097,7 +1119,11 @@ test "does not yield false-positive matches" do end test "works with URIs" do - results = User.search("http://mastodon.example.org/users/admin", resolve: true) + user = insert(:user) + + results = + User.search("http://mastodon.example.org/users/admin", resolve: true, for_user: user) + result = results |> List.first() user = User.get_cached_by_ap_id("http://mastodon.example.org/users/admin") diff --git a/test/web/mastodon_api/mastodon_api_controller_test.exs b/test/web/mastodon_api/mastodon_api_controller_test.exs index 8679a083d..51c1cdfac 100644 --- a/test/web/mastodon_api/mastodon_api_controller_test.exs +++ b/test/web/mastodon_api/mastodon_api_controller_test.exs @@ -2173,8 +2173,11 @@ test "search doesn't show statuses that it shouldn't", %{conn: conn} do end test "search fetches remote accounts", %{conn: conn} do + user = insert(:user) + conn = conn + |> assign(:user, user) |> get("/api/v1/search", %{"q" => "shp@social.heldscal.la", "resolve" => "true"}) assert results = json_response(conn, 200)