Search through users and hashtags as well

This commit is contained in:
FloatingGhost 2021-12-14 13:53:46 +00:00
parent a85bf5929b
commit 703f53c08f
9 changed files with 280 additions and 71 deletions

View file

@ -26,7 +26,7 @@ def run(["import", "activities" | _rest]) do
def run(["import", "users" | _rest]) do def run(["import", "users" | _rest]) do
start_pleroma() start_pleroma()
from(u in User, where: not ilike(u.ap_id, "%/relay")) from(u in User, where: u.nickname not in ["internal.fetch", "relay"])
|> get_all(:users) |> get_all(:users)
end end

View file

@ -6,7 +6,8 @@ def encode(%{actor_type: "Person"} = user) do
timestamp: user.inserted_at, timestamp: user.inserted_at,
instance: URI.parse(user.ap_id).host, instance: URI.parse(user.ap_id).host,
nickname: user.nickname, nickname: user.nickname,
bio: user.bio bio: user.bio,
display_name: user.name
} }
end end
end end

View file

@ -1,24 +1,32 @@
defmodule Pleroma.Elasticsearch do defmodule Pleroma.Elasticsearch do
alias Pleroma.Activity alias Pleroma.Activity
alias Pleroma.User
alias Pleroma.Elasticsearch.DocumentMappings alias Pleroma.Elasticsearch.DocumentMappings
alias Pleroma.Config alias Pleroma.Config
require Logger
defp url do defp url do
Config.get([:elasticsearch, :url]) Config.get([:elasticsearch, :url])
end end
def put_by_id(id) do defp enabled? do
Config.get([:search, :provider]) == Pleroma.Search.Elasticsearch
end
def put_by_id(:activity, id) do
id id
|> Activity.get_by_id_with_object() |> Activity.get_by_id_with_object()
|> maybe_put_into_elasticsearch() |> maybe_put_into_elasticsearch()
end end
def maybe_put_into_elasticsearch({:ok, activity}) do def maybe_put_into_elasticsearch({:ok, item}) do
maybe_put_into_elasticsearch(activity) maybe_put_into_elasticsearch(item)
end end
def maybe_put_into_elasticsearch(%{data: %{"type" => "Create"}, object: %{data: %{"type" => "Note"}}} = activity) do def maybe_put_into_elasticsearch(
if Config.get([:search, :provider]) == Pleroma.Search.Elasticsearch do %{data: %{"type" => "Create"}, object: %{data: %{"type" => "Note"}}} = activity
) do
if enabled?() do
actor = Pleroma.Activity.user_actor(activity) actor = Pleroma.Activity.user_actor(activity)
activity activity
@ -27,27 +35,48 @@ def maybe_put_into_elasticsearch(%{data: %{"type" => "Create"}, object: %{data:
end end
end end
def maybe_put_into_elasticsearch(%User{} = user) do
if enabled?() do
put(user)
end
end
def maybe_put_into_elasticsearch(_) do def maybe_put_into_elasticsearch(_) do
{:ok, :skipped} {:ok, :skipped}
end end
def put(%Activity{} = activity) do def put(%Activity{} = activity) do
Elastix.Document.index( {:ok, _} = Elastix.Document.index(
url(), url(),
"activities", "activities",
"activity", "activity",
DocumentMappings.Activity.id(activity), DocumentMappings.Activity.id(activity),
DocumentMappings.Activity.encode(activity) DocumentMappings.Activity.encode(activity)
) )
{:ok, _} = bulk_post(
activity.object.hashtags, :hashtags
)
end
def put(%User{} = user) do
{:ok, _ } = Elastix.Document.index(
url(),
"users",
"user",
DocumentMappings.User.id(user),
DocumentMappings.User.encode(user)
)
end end
def bulk_post(data, :activities) do def bulk_post(data, :activities) do
d = d =
data data
|> Enum.filter(fn x -> |> Enum.filter(fn x ->
t = x.object t =
x.object
|> Map.get(:data, %{}) |> Map.get(:data, %{})
|> Map.get("type", "") |> Map.get("type", "")
t == "Note" t == "Note"
end) end)
|> Enum.map(fn d -> |> Enum.map(fn d ->
@ -58,7 +87,7 @@ def bulk_post(data, :activities) do
end) end)
|> List.flatten() |> List.flatten()
Elastix.Bulk.post( {:ok, %{body: %{"errors" => false}}} = Elastix.Bulk.post(
url(), url(),
d, d,
index: "activities", index: "activities",
@ -104,12 +133,54 @@ def bulk_post(data, :hashtags) do
) )
end end
def search_activities(q) do def search(:raw, index, type, q) do
Elastix.Search.search( with {:ok, raw_results} <- Elastix.Search.search(url(), index, [type], q) do
url(), results =
"activities", raw_results
["activity"], |> Map.get(:body, %{})
q |> Map.get("hits", %{})
) |> Map.get("hits", [])
{:ok, results}
else
{:error, e} ->
Logger.error(e)
{:error, e}
end
end
def search(:activities, q) do
with {:ok, results} <- search(:raw, "activities", "activity", q) do
results
|> Enum.map(fn result -> result["_id"] end)
|> Pleroma.Activity.all_by_ids_with_object()
else
e ->
Logger.error(e)
[]
end
end
def search(:users, q) do
with {:ok, results} <- search(:raw, "users", "user", q) do
results
|> Enum.map(fn result -> result["_id"] end)
|> Pleroma.User.get_all_by_ids()
else
e ->
Logger.error(e)
[]
end
end
def search(:hashtags, q) do
with {:ok, results} <- search(:raw, "hashtags", "hashtag", q) do
results
|> Enum.map(fn result -> result["_source"]["hashtag"] end)
else
e ->
Logger.error(e)
[]
end
end end
end end

View file

@ -2,79 +2,94 @@ defmodule Pleroma.Search.Elasticsearch do
@behaviour Pleroma.Search @behaviour Pleroma.Search
alias Pleroma.Web.MastodonAPI.StatusView alias Pleroma.Web.MastodonAPI.StatusView
alias Pleroma.Web.MastodonAPI.AccountView
alias Pleroma.Web.ActivityPub.Visibility alias Pleroma.Web.ActivityPub.Visibility
alias Pleroma.Search.Elasticsearch.Parsers
alias Pleroma.Web.Endpoint
defp to_es(term) when is_binary(term) do defp es_query(:activity, query) do
%{ %{
match: %{ query: %{
content: %{ bool: %{
query: term, must: Parsers.Activity.parse(query)
operator: "AND"
} }
} }
} }
end end
defp to_es({:quoted, term}), do: to_es(term) defp es_query(:user, query) do
defp to_es({:filter, ["hashtag", query]}) do
%{ %{
term: %{ query: %{
hashtags: %{ bool: %{
value: query must: Parsers.User.parse(query)
} }
} }
} }
end end
defp to_es({:filter, [field, query]}) do defp es_query(:hashtag, query) do
%{ %{
term: %{ query: %{
field => %{ bool: %{
value: query must: Parsers.Hashtag.parse(query)
} }
} }
} }
end end
defp parse(query) do
query
|> SearchParser.parse!()
|> Enum.map(&to_es/1)
end
@impl Pleroma.Search @impl Pleroma.Search
def search(%{assigns: %{user: user}} = _conn, %{q: query} = _params, _options) do def search(%{assigns: %{user: user}} = _conn, %{q: query} = _params, _options) do
q = %{ parsed_query =
query: %{ query
bool: %{ |> String.trim()
must: parse(String.trim(query)) |> SearchParser.parse!()
}
}
}
out = Pleroma.Elasticsearch.search_activities(q) activity_task =
Task.async(fn ->
q = es_query(:activity, parsed_query)
with {:ok, raw_results} <- out do Pleroma.Elasticsearch.search(:activities, q)
results =
raw_results
|> Map.get(:body, %{})
|> Map.get("hits", %{})
|> Map.get("hits", [])
|> Enum.map(fn result -> result["_id"] end)
|> Pleroma.Activity.all_by_ids_with_object()
|> Enum.filter(fn x -> Visibility.visible_for_user?(x, user) end) |> Enum.filter(fn x -> Visibility.visible_for_user?(x, user) end)
end)
user_task =
Task.async(fn ->
q = es_query(:user, parsed_query)
Pleroma.Elasticsearch.search(:users, q)
|> Enum.filter(fn x -> Pleroma.User.visible_for(x, user) == :visible end)
end)
hashtag_task =
Task.async(fn ->
q = es_query(:hashtag, parsed_query)
Pleroma.Elasticsearch.search(:hashtags, q)
end)
activity_results = Task.await(activity_task)
user_results = Task.await(user_task)
hashtag_results = Task.await(hashtag_task)
%{ %{
"accounts" => [], "accounts" =>
"hashtags" => [], AccountView.render("index.json",
users: user_results,
for: user
),
"hashtags" =>
Enum.map(hashtag_results, fn x ->
%{
url: Endpoint.url() <> "/tag/" <> x,
name: x
}
end),
"statuses" => "statuses" =>
StatusView.render("index.json", StatusView.render("index.json",
activities: results, activities: activity_results,
for: user, for: user,
as: :activity as: :activity
) )
} }
end end
end end
end

View file

@ -0,0 +1,38 @@
defmodule Pleroma.Search.Elasticsearch.Parsers.Activity do
defp to_es(term) when is_binary(term) do
%{
match: %{
content: %{
query: term,
operator: "AND"
}
}
}
end
defp to_es({:quoted, term}), do: to_es(term)
defp to_es({:filter, ["hashtag", query]}) do
%{
term: %{
hashtags: %{
value: query
}
}
}
end
defp to_es({:filter, [field, query]}) do
%{
term: %{
field => %{
value: query
}
}
}
end
def parse(q) do
Enum.map(q, &to_es/1)
end
end

View file

@ -0,0 +1,30 @@
defmodule Pleroma.Search.Elasticsearch.Parsers.Hashtag do
defp to_es(term) when is_binary(term) do
%{
term: %{
hashtag: %{
value: String.downcase(term),
}
}
}
end
defp to_es({:quoted, term}), do: to_es(term)
defp to_es({:filter, ["hashtag", query]}) do
%{
term: %{
hashtag: %{
value: String.downcase(query)
}
}
}
end
defp to_es({:filter, _}), do: nil
def parse(q) do
Enum.map(q, &to_es/1)
|> Enum.filter(fn x -> x != nil end)
end
end

View file

@ -0,0 +1,53 @@
defmodule Pleroma.Search.Elasticsearch.Parsers.User do
defp to_es(term) when is_binary(term) do
%{
bool: %{
minimum_should_match: 1,
should: [
%{
match: %{
bio: %{
query: term,
operator: "AND"
}
}
},
%{
term: %{
nickname: %{
value: term
}
}
},
%{
match: %{
display_name: %{
query: term,
operator: "AND"
}
}
}
]
}
}
end
defp to_es({:quoted, term}), do: to_es(term)
defp to_es({:filter, ["user", query]}) do
%{
term: %{
nickname: %{
value: query
}
}
}
end
defp to_es({:filter, _}), do: nil
def parse(q) do
Enum.map(q, &to_es/1)
|> Enum.filter(fn x -> x != nil end)
end
end

View file

@ -1088,6 +1088,7 @@ def update_and_set_cache(struct, params) do
def update_and_set_cache(changeset) do def update_and_set_cache(changeset) do
with {:ok, user} <- Repo.update(changeset, stale_error_field: :id) do with {:ok, user} <- Repo.update(changeset, stale_error_field: :id) do
Pleroma.Elasticsearch.maybe_put_into_elasticsearch(user)
set_cache(user) set_cache(user)
end end
end end

View file

@ -538,7 +538,7 @@ defp add_notifications(meta, notifications) do
@impl true @impl true
def handle_after_transaction(%Pleroma.Activity{data: %{"type" => "Create"}} = activity) do def handle_after_transaction(%Pleroma.Activity{data: %{"type" => "Create"}} = activity) do
Pleroma.Elasticsearch.put_by_id(activity.id) Pleroma.Elasticsearch.put_by_id(:activity, activity.id)
end end
def handle_after_transaction(%Pleroma.Activity{}) do def handle_after_transaction(%Pleroma.Activity{}) do