From 93397fce3de54985bde3c3f260660a63157077be Mon Sep 17 00:00:00 2001 From: Egor Kislitsyn Date: Fri, 31 May 2019 16:22:13 +0700 Subject: [PATCH 1/6] Contain search for unauthenticated users --- lib/pleroma/activity.ex | 2 + lib/pleroma/activity/search.ex | 75 +++++++++++++++++++ .../mastodon_api/mastodon_api_controller.ex | 61 +-------------- test/activity_test.exs | 38 ++++++++++ 4 files changed, 117 insertions(+), 59 deletions(-) create mode 100644 lib/pleroma/activity/search.ex diff --git a/lib/pleroma/activity.ex b/lib/pleroma/activity.ex index 99589590c..6db41fe6e 100644 --- a/lib/pleroma/activity.ex +++ b/lib/pleroma/activity.ex @@ -343,4 +343,6 @@ def restrict_deactivated_users(query) do ) ) end + + defdelegate search(user, query), to: Pleroma.Activity.Search end diff --git a/lib/pleroma/activity/search.ex b/lib/pleroma/activity/search.ex new file mode 100644 index 000000000..f2fdfffe1 --- /dev/null +++ b/lib/pleroma/activity/search.ex @@ -0,0 +1,75 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2019 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.Activity.Search do + alias Pleroma.Activity + alias Pleroma.Object.Fetcher + alias Pleroma.Repo + alias Pleroma.User + alias Pleroma.Web.ActivityPub.Visibility + + import Ecto.Query + + def search(user, search_query) do + index_type = if Pleroma.Config.get([:database, :rum_enabled]), do: :rum, else: :gin + + Activity + |> Activity.with_preloaded_object() + |> Activity.restrict_deactivated_users() + |> restrict_public() + |> query_with(index_type, search_query) + |> maybe_restrict_local(user) + |> Repo.all() + |> maybe_fetch(user, search_query) + end + + defp restrict_public(q) do + from([a, o] in q, + where: fragment("?->>'type' = 'Create'", a.data), + where: "https://www.w3.org/ns/activitystreams#Public" in a.recipients, + limit: 40 + ) + end + + defp query_with(q, :gin, search_query) do + from([a, o] in q, + where: + fragment( + "to_tsvector('english', ?->>'content') @@ plainto_tsquery('english', ?)", + o.data, + ^search_query + ), + order_by: [desc: :id] + ) + end + + defp query_with(q, :rum, search_query) do + from([a, o] in q, + where: + fragment( + "? @@ plainto_tsquery('english', ?)", + o.fts_content, + ^search_query + ), + order_by: [fragment("? <=> now()::date", o.inserted_at)] + ) + end + + # users can search everything + defp maybe_restrict_local(q, %User{}), do: q + + # unauthenticated users can only search local activities + defp maybe_restrict_local(q, _), do: where(q, local: true) + + defp maybe_fetch(activities, user, search_query) do + with true <- Regex.match?(~r/https?:/, search_query), + {:ok, object} <- Fetcher.fetch_object_from_id(search_query), + %Activity{} = activity <- Activity.get_create_by_object_ap_id(object.data["id"]), + true <- Visibility.visible_for_user?(activity, user) do + activities ++ [activity] + else + _ -> activities + end + end +end diff --git a/lib/pleroma/web/mastodon_api/mastodon_api_controller.ex b/lib/pleroma/web/mastodon_api/mastodon_api_controller.ex index d825555c6..92cd77f62 100644 --- a/lib/pleroma/web/mastodon_api/mastodon_api_controller.ex +++ b/lib/pleroma/web/mastodon_api/mastodon_api_controller.ex @@ -14,7 +14,6 @@ defmodule Pleroma.Web.MastodonAPI.MastodonAPIController do alias Pleroma.HTTP alias Pleroma.Notification alias Pleroma.Object - alias Pleroma.Object.Fetcher alias Pleroma.Pagination alias Pleroma.Repo alias Pleroma.ScheduledActivity @@ -1125,64 +1124,9 @@ def unsubscribe(%{assigns: %{user: user}} = conn, %{"id" => id}) do end end - def status_search_query_with_gin(q, query) do - from([a, o] in q, - where: - fragment( - "to_tsvector('english', ?->>'content') @@ plainto_tsquery('english', ?)", - o.data, - ^query - ), - order_by: [desc: :id] - ) - end - - def status_search_query_with_rum(q, query) do - from([a, o] in q, - where: - fragment( - "? @@ plainto_tsquery('english', ?)", - o.fts_content, - ^query - ), - order_by: [fragment("? <=> now()::date", o.inserted_at)] - ) - end - - def status_search(user, query) do - fetched = - if Regex.match?(~r/https?:/, query) do - with {:ok, object} <- Fetcher.fetch_object_from_id(query), - %Activity{} = activity <- Activity.get_create_by_object_ap_id(object.data["id"]), - true <- Visibility.visible_for_user?(activity, user) do - [activity] - else - _e -> [] - end - end || [] - - q = - from([a, o] in Activity.with_preloaded_object(Activity), - where: fragment("?->>'type' = 'Create'", a.data), - where: "https://www.w3.org/ns/activitystreams#Public" in a.recipients, - limit: 40 - ) - - q = - if Pleroma.Config.get([:database, :rum_enabled]) do - status_search_query_with_rum(q, query) - else - status_search_query_with_gin(q, query) - end - - Repo.all(q) ++ fetched - end - def search2(%{assigns: %{user: user}} = conn, %{"q" => query} = params) do accounts = User.search(query, resolve: params["resolve"] == "true", for_user: user) - - statuses = status_search(user, query) - + statuses = Activity.search(user, query) tags_path = Web.base_url() <> "/tag/" tags = @@ -1205,8 +1149,7 @@ def search2(%{assigns: %{user: user}} = conn, %{"q" => query} = params) do def search(%{assigns: %{user: user}} = conn, %{"q" => query} = params) do accounts = User.search(query, resolve: params["resolve"] == "true", for_user: user) - - statuses = status_search(user, query) + statuses = Activity.search(user, query) tags = query diff --git a/test/activity_test.exs b/test/activity_test.exs index 15c95502a..5260ebb9e 100644 --- a/test/activity_test.exs +++ b/test/activity_test.exs @@ -99,4 +99,42 @@ test "when association is not loaded" do assert Activity.get_bookmark(queried_activity, user) == bookmark end end + + describe "search" do + setup do + Tesla.Mock.mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end) + + user = insert(:user) + + params = %{ + "@context" => "https://www.w3.org/ns/activitystreams", + "actor" => "http://mastodon.example.org/users/admin", + "type" => "Create", + "id" => "http://mastodon.example.org/users/admin/activities/1", + "object" => %{ + "type" => "Note", + "content" => "find me!", + "id" => "http://mastodon.example.org/users/admin/objects/1", + "attributedTo" => "http://mastodon.example.org/users/admin" + }, + "to" => ["https://www.w3.org/ns/activitystreams#Public"] + } + + {:ok, local_activity} = Pleroma.Web.CommonAPI.post(user, %{"status" => "find me!"}) + {:ok, remote_activity} = Pleroma.Web.Federator.incoming_ap_doc(params) + %{local_activity: local_activity, remote_activity: remote_activity, user: user} + end + + test "find local and remote statuses for authenticated users", %{ + local_activity: local_activity, + remote_activity: remote_activity, + user: user + } do + assert [^remote_activity, ^local_activity] = Activity.search(user, "find me") + end + + test "find only local statuses for unauthenticated users", %{local_activity: local_activity} do + assert [^local_activity] = Activity.search(nil, "find me") + end + end end From 94b9e9d844ad47c198817e732a54ad286caa346a Mon Sep 17 00:00:00 2001 From: Egor Kislitsyn Date: Fri, 31 May 2019 16:37:33 +0700 Subject: [PATCH 2/6] Update benchmark mix task --- lib/mix/tasks/benchmark.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/mix/tasks/benchmark.ex b/lib/mix/tasks/benchmark.ex index 0fbb4dbb1..e4b1a638a 100644 --- a/lib/mix/tasks/benchmark.ex +++ b/lib/mix/tasks/benchmark.ex @@ -7,7 +7,7 @@ def run(["search"]) do Benchee.run(%{ "search" => fn -> - Pleroma.Web.MastodonAPI.MastodonAPIController.status_search(nil, "cofe") + Pleroma.Activity.search(nil, "cofe") end }) end From 17a6f81f7b2ecd42ea003dc5d81258d6bd04bfa5 Mon Sep 17 00:00:00 2001 From: Egor Kislitsyn Date: Fri, 31 May 2019 17:11:45 +0700 Subject: [PATCH 3/6] Fix tests with enabled RUM --- test/activity_test.exs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/activity_test.exs b/test/activity_test.exs index 5260ebb9e..1814e313d 100644 --- a/test/activity_test.exs +++ b/test/activity_test.exs @@ -130,7 +130,9 @@ test "find local and remote statuses for authenticated users", %{ remote_activity: remote_activity, user: user } do - assert [^remote_activity, ^local_activity] = Activity.search(user, "find me") + activities = Enum.sort_by(Activity.search(user, "find me"), & &1.id) + + assert [^local_activity, ^remote_activity] = activities end test "find only local statuses for unauthenticated users", %{local_activity: local_activity} do From 17004a0f1a227d1ba8400fcdd5d351de83b09673 Mon Sep 17 00:00:00 2001 From: Egor Kislitsyn Date: Mon, 3 Jun 2019 18:57:24 +0700 Subject: [PATCH 4/6] Create index on `activities.local` --- .../20190603115238_add_index_on_activities_local.exs | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 priv/repo/migrations/20190603115238_add_index_on_activities_local.exs diff --git a/priv/repo/migrations/20190603115238_add_index_on_activities_local.exs b/priv/repo/migrations/20190603115238_add_index_on_activities_local.exs new file mode 100644 index 000000000..89daa9705 --- /dev/null +++ b/priv/repo/migrations/20190603115238_add_index_on_activities_local.exs @@ -0,0 +1,7 @@ +defmodule Pleroma.Repo.Migrations.AddIndexOnActivitiesLocal do + use Ecto.Migration + + def change do + create(index("activities", [:local])) + end +end From 5b04f07a1ebe6763270b406aa6638336cab04a31 Mon Sep 17 00:00:00 2001 From: Egor Kislitsyn Date: Wed, 5 Jun 2019 16:34:14 +0700 Subject: [PATCH 5/6] Limit search for unauthenticated users to local users only --- lib/pleroma/user.ex | 117 +------------- lib/pleroma/user/search.ex | 145 ++++++++++++++++++ test/user_test.exs | 30 +++- .../mastodon_api_controller_test.exs | 3 + 4 files changed, 178 insertions(+), 117 deletions(-) create mode 100644 lib/pleroma/user/search.ex diff --git a/lib/pleroma/user.ex b/lib/pleroma/user.ex index dc534b05c..498428269 100644 --- a/lib/pleroma/user.ex +++ b/lib/pleroma/user.ex @@ -735,121 +735,6 @@ def get_recipients_from_activity(%Activity{recipients: to}) do |> Repo.all() end - def search(query, resolve \\ false, for_user \\ nil) do - # Strip the beginning @ off if there is a query - query = String.trim_leading(query, "@") - - if resolve, do: get_or_fetch(query) - - {:ok, results} = - Repo.transaction(fn -> - Ecto.Adapters.SQL.query(Repo, "select set_limit(0.25)", []) - Repo.all(search_query(query, for_user)) - end) - - results - end - - def search_query(query, for_user) do - fts_subquery = fts_search_subquery(query) - trigram_subquery = trigram_search_subquery(query) - union_query = from(s in trigram_subquery, union_all: ^fts_subquery) - distinct_query = from(s in subquery(union_query), order_by: s.search_type, distinct: s.id) - - from(s in subquery(boost_search_rank_query(distinct_query, for_user)), - order_by: [desc: s.search_rank], - limit: 40 - ) - end - - defp boost_search_rank_query(query, nil), do: query - - defp boost_search_rank_query(query, for_user) do - friends_ids = get_friends_ids(for_user) - followers_ids = get_followers_ids(for_user) - - from(u in subquery(query), - select_merge: %{ - search_rank: - fragment( - """ - CASE WHEN (?) THEN (?) * 1.3 - WHEN (?) THEN (?) * 1.2 - WHEN (?) THEN (?) * 1.1 - ELSE (?) END - """, - u.id in ^friends_ids and u.id in ^followers_ids, - u.search_rank, - u.id in ^friends_ids, - u.search_rank, - u.id in ^followers_ids, - u.search_rank, - u.search_rank - ) - } - ) - end - - defp fts_search_subquery(term, query \\ User) do - processed_query = - term - |> String.replace(~r/\W+/, " ") - |> String.trim() - |> String.split() - |> Enum.map(&(&1 <> ":*")) - |> Enum.join(" | ") - - from( - u in query, - select_merge: %{ - search_type: ^0, - search_rank: - fragment( - """ - ts_rank_cd( - setweight(to_tsvector('simple', regexp_replace(?, '\\W', ' ', 'g')), 'A') || - setweight(to_tsvector('simple', regexp_replace(coalesce(?, ''), '\\W', ' ', 'g')), 'B'), - to_tsquery('simple', ?), - 32 - ) - """, - u.nickname, - u.name, - ^processed_query - ) - }, - where: - fragment( - """ - (setweight(to_tsvector('simple', regexp_replace(?, '\\W', ' ', 'g')), 'A') || - setweight(to_tsvector('simple', regexp_replace(coalesce(?, ''), '\\W', ' ', 'g')), 'B')) @@ to_tsquery('simple', ?) - """, - u.nickname, - u.name, - ^processed_query - ) - ) - |> restrict_deactivated() - end - - defp trigram_search_subquery(term) do - from( - u in User, - select_merge: %{ - # ^1 gives 'Postgrex expected a binary, got 1' for some weird reason - search_type: fragment("?", 1), - search_rank: - fragment( - "similarity(?, trim(? || ' ' || coalesce(?, '')))", - ^term, - u.nickname, - u.name - ) - }, - where: fragment("trim(? || ' ' || coalesce(?, '')) % ?", u.nickname, u.name, ^term) - ) - |> restrict_deactivated() - end def mute(muter, %User{ap_id: ap_id}) do info_cng = @@ -1449,4 +1334,6 @@ def get_ap_ids_by_nicknames(nicknames) do ) |> Repo.all() end + + defdelegate search(query, opts \\ []), to: User.Search end diff --git a/lib/pleroma/user/search.ex b/lib/pleroma/user/search.ex new file mode 100644 index 000000000..d5b2eaa9f --- /dev/null +++ b/lib/pleroma/user/search.ex @@ -0,0 +1,145 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2019 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.User.Search do + alias Pleroma.Repo + alias Pleroma.User + import Ecto.Query + + def search(query, opts \\ []) do + resolve = Keyword.get(opts, :resolve, false) + for_user = Keyword.get(opts, :for_user) + + # Strip the beginning @ off if there is a query + query = String.trim_leading(query, "@") + + if match?(%User{}, for_user) and resolve, do: User.get_or_fetch(query) + + {:ok, results} = + Repo.transaction(fn -> + Ecto.Adapters.SQL.query(Repo, "select set_limit(0.25)", []) + + query + |> search_query(for_user) + |> Repo.all() + end) + + results + end + + defp search_query(query, for_user) do + query + |> union_query() + |> distinct_query() + |> boost_search_rank_query(for_user) + |> subquery() + |> order_by(desc: :search_rank) + |> limit(20) + |> maybe_restrict_local(for_user) + end + + defp union_query(query) do + fts_subquery = fts_search_subquery(query) + trigram_subquery = trigram_search_subquery(query) + + from(s in trigram_subquery, union_all: ^fts_subquery) + end + + defp distinct_query(q) do + from(s in subquery(q), order_by: s.search_type, distinct: s.id) + end + + # unauthenticated users can only search local activities + defp maybe_restrict_local(q, %User{}), do: q + defp maybe_restrict_local(q, _), do: where(q, [u], u.local == true) + + defp boost_search_rank_query(query, nil), do: query + + defp boost_search_rank_query(query, for_user) do + friends_ids = User.get_friends_ids(for_user) + followers_ids = User.get_followers_ids(for_user) + + from(u in subquery(query), + select_merge: %{ + search_rank: + fragment( + """ + CASE WHEN (?) THEN (?) * 1.3 + WHEN (?) THEN (?) * 1.2 + WHEN (?) THEN (?) * 1.1 + ELSE (?) END + """, + u.id in ^friends_ids and u.id in ^followers_ids, + u.search_rank, + u.id in ^friends_ids, + u.search_rank, + u.id in ^followers_ids, + u.search_rank, + u.search_rank + ) + } + ) + end + + defp fts_search_subquery(term, query \\ User) do + processed_query = + term + |> String.replace(~r/\W+/, " ") + |> String.trim() + |> String.split() + |> Enum.map(&(&1 <> ":*")) + |> Enum.join(" | ") + + from( + u in query, + select_merge: %{ + search_type: ^0, + search_rank: + fragment( + """ + ts_rank_cd( + setweight(to_tsvector('simple', regexp_replace(?, '\\W', ' ', 'g')), 'A') || + setweight(to_tsvector('simple', regexp_replace(coalesce(?, ''), '\\W', ' ', 'g')), 'B'), + to_tsquery('simple', ?), + 32 + ) + """, + u.nickname, + u.name, + ^processed_query + ) + }, + where: + fragment( + """ + (setweight(to_tsvector('simple', regexp_replace(?, '\\W', ' ', 'g')), 'A') || + setweight(to_tsvector('simple', regexp_replace(coalesce(?, ''), '\\W', ' ', 'g')), 'B')) @@ to_tsquery('simple', ?) + """, + u.nickname, + u.name, + ^processed_query + ) + ) + |> User.restrict_deactivated() + end + + defp trigram_search_subquery(term) do + from( + u in User, + select_merge: %{ + # ^1 gives 'Postgrex expected a binary, got 1' for some weird reason + search_type: fragment("?", 1), + search_rank: + fragment( + "similarity(?, trim(? || ' ' || coalesce(?, '')))", + ^term, + u.nickname, + u.name + ) + }, + where: fragment("trim(? || ' ' || coalesce(?, '')) % ?", u.nickname, u.name, ^term) + ) + |> User.restrict_deactivated() + end +end diff --git a/test/user_test.exs b/test/user_test.exs index d7473ef43..1a82aa6f7 100644 --- a/test/user_test.exs +++ b/test/user_test.exs @@ -1055,7 +1055,7 @@ test "finds users, ranking by similarity" do u3 = insert(:user, %{name: "ebn", nickname: "lain@mastodon.social"}) u4 = insert(:user, %{nickname: "lain@pleroma.soykaf.com"}) - assert [u4.id, u3.id, u1.id] == Enum.map(User.search("lain@ple"), & &1.id) + assert [u4.id, u3.id, u1.id] == Enum.map(User.search("lain@ple", for_user: u1), & &1.id) end test "finds users, handling misspelled requests" do @@ -1077,6 +1077,28 @@ test "finds users, boosting ranks of friends and followers" do Enum.map(User.search("doe", resolve: false, for_user: u1), & &1.id) == [] end + test "find local and remote statuses for authenticated users" do + u1 = insert(:user, %{name: "lain"}) + u2 = insert(:user, %{name: "ebn", nickname: "lain@mastodon.social", local: false}) + u3 = insert(:user, %{nickname: "lain@pleroma.soykaf.com", local: false}) + + results = + "lain" + |> User.search(for_user: u1) + |> Enum.map(& &1.id) + |> Enum.sort() + + assert [u1.id, u2.id, u3.id] == results + end + + test "find only local statuses for unauthenticated users" do + %{id: id} = insert(:user, %{name: "lain"}) + insert(:user, %{name: "ebn", nickname: "lain@mastodon.social", local: false}) + insert(:user, %{nickname: "lain@pleroma.soykaf.com", local: false}) + + assert [%{id: ^id}] = User.search("lain") + end + test "finds a user whose name is nil" do _user = insert(:user, %{name: "notamatch", nickname: "testuser@pleroma.amplifie.red"}) user_two = insert(:user, %{name: nil, nickname: "lain@pleroma.soykaf.com"}) @@ -1097,7 +1119,11 @@ test "does not yield false-positive matches" do end test "works with URIs" do - results = User.search("http://mastodon.example.org/users/admin", resolve: true) + user = insert(:user) + + results = + User.search("http://mastodon.example.org/users/admin", resolve: true, for_user: user) + result = results |> List.first() user = User.get_cached_by_ap_id("http://mastodon.example.org/users/admin") diff --git a/test/web/mastodon_api/mastodon_api_controller_test.exs b/test/web/mastodon_api/mastodon_api_controller_test.exs index 8679a083d..51c1cdfac 100644 --- a/test/web/mastodon_api/mastodon_api_controller_test.exs +++ b/test/web/mastodon_api/mastodon_api_controller_test.exs @@ -2173,8 +2173,11 @@ test "search doesn't show statuses that it shouldn't", %{conn: conn} do end test "search fetches remote accounts", %{conn: conn} do + user = insert(:user) + conn = conn + |> assign(:user, user) |> get("/api/v1/search", %{"q" => "shp@social.heldscal.la", "resolve" => "true"}) assert results = json_response(conn, 200) From 1cb245c9825febb0b83cfc361f78d132cb5d05a8 Mon Sep 17 00:00:00 2001 From: Egor Kislitsyn Date: Wed, 5 Jun 2019 16:55:17 +0700 Subject: [PATCH 6/6] Fix formatting --- lib/pleroma/user.ex | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/pleroma/user.ex b/lib/pleroma/user.ex index 498428269..cae2c14e3 100644 --- a/lib/pleroma/user.ex +++ b/lib/pleroma/user.ex @@ -735,7 +735,6 @@ def get_recipients_from_activity(%Activity{recipients: to}) do |> Repo.all() end - def mute(muter, %User{ap_id: ap_id}) do info_cng = muter.info