make search provider configurable

This commit is contained in:
FloatingGhost 2021-12-12 17:23:44 +00:00
parent 345eb7b3f8
commit ed3a866f94
12 changed files with 289 additions and 287 deletions

View file

@ -851,8 +851,7 @@ config :pleroma, ConcurrentLimiter, [
{Pleroma.Web.ActivityPub.MRF.MediaProxyWarmingPolicy, [max_running: 5, max_waiting: 5]}
]
config :pleroma, :search,
provider: :builtin
config :pleroma, :search, provider: Pleroma.Search.Builtin
# Import environment specific config. This must remain at the bottom
# of this file so it overrides the configuration defined above.

View file

@ -6,32 +6,33 @@ defmodule Mix.Tasks.Pleroma.Search do
use Mix.Task
import Mix.Pleroma
import Ecto.Query
alias Pleroma.Elasticsearch
alias Pleroma.Activity
alias Pleroma.Repo
alias Pleroma.Pagination
@shortdoc "Manages elasticsearch"
def run(["import" | rest]) do
def run(["import" | _rest]) do
start_pleroma()
query = from(a in Activity, where: not ilike(a.actor, "%/relay"))
|> Activity.with_preloaded_object
|> Activity.with_preloaded_user_actor
from(a in Activity, where: not ilike(a.actor, "%/relay"))
|> Activity.with_preloaded_object()
|> Activity.with_preloaded_user_actor()
|> get_all
end
defp get_all(query, max_id \\ nil) do
params = %{limit: 20}
params = if max_id == nil do
params
else
Map.put(params, :max_id, max_id)
end
res = query
|> Pagination.fetch_paginated(params)
params =
if max_id == nil do
params
else
Map.put(params, :max_id, max_id)
end
res =
query
|> Pagination.fetch_paginated(params)
if res == [] do
:ok
@ -42,5 +43,4 @@ defmodule Mix.Tasks.Pleroma.Search do
get_all(query, List.last(res).id)
end
end
end

View file

@ -2,13 +2,14 @@ defmodule Pleroma.Elasticsearch.DocumentMappings.Activity do
alias Pleroma.Object
def id(obj), do: obj.id
def encode(%{object: %{data: %{ "type" => "Note" }}} = activity) do
def encode(%{object: %{data: %{"type" => "Note"}}} = activity) do
%{
_timestamp: activity.inserted_at,
user: activity.user_actor.nickname,
content: activity.object.data["content"],
instance: URI.parse(activity.user_actor.ap_id).host,
hashtags: Object.hashtags(activity.object)
_timestamp: activity.inserted_at,
user: activity.user_actor.nickname,
content: activity.object.data["content"],
instance: URI.parse(activity.user_actor.ap_id).host,
hashtags: Object.hashtags(activity.object)
}
end
end

View file

@ -2,118 +2,45 @@ defmodule Pleroma.Elasticsearch do
alias Pleroma.Activity
alias Pleroma.Elasticsearch.DocumentMappings
@searchable [
"hashtag", "instance", "user"
]
defp url do
Pleroma.Config.get([:elasticsearch, :url])
end
def put(%Activity{} = activity) do
Elastix.Document.index(
url(),
"activities",
"activity",
DocumentMappings.Activity.id(activity),
DocumentMappings.Activity.encode(activity)
url(),
"activities",
"activity",
DocumentMappings.Activity.id(activity),
DocumentMappings.Activity.encode(activity)
)
end
def bulk_post(data, :activities) do
d = data
|> Enum.map(fn d ->
d =
data
|> Enum.map(fn d ->
[
%{index: %{_id: DocumentMappings.Activity.id(d)}},
DocumentMappings.Activity.encode(d)
%{index: %{_id: DocumentMappings.Activity.id(d)}},
DocumentMappings.Activity.encode(d)
]
end)
|> List.flatten()
end)
|> List.flatten()
Elastix.Bulk.post(
url(),
d,
index: "activities",
type: "activity"
url(),
d,
index: "activities",
type: "activity"
)
end
defp parse_term(t) do
if String.contains?(t, ":") and !String.starts_with?(t, "\"") do
[field, query] = String.split(t, ":")
if Enum.member?(@searchable, field) do
{field, query}
else
{"content", query}
end
else
{"content", t}
end
end
defp search_user(params, q) do
if q["user"] != nil do
params ++ [%{match: %{user: %{
query: Enum.join(q["user"], " "),
operator: "OR"
}}}]
else
params
end
end
defp search_instance(params, q) do
if q["instance"] != nil do
params ++ [%{match: %{instance: %{
query: Enum.join(q["instance"], " "),
operator: "OR"
}}}]
else
params
end
end
defp search_content(params, q) do
if q["content"] != nil do
params ++ [%{match: %{content: %{
query: Enum.join(q["content"], " "),
operator: "AND"
}}}]
else
params
end
end
defp to_es(q) do
[]
|> search_content(q)
|> search_instance(q)
|> search_user(q)
end
defp parse(query) do
String.split(query, " ")
|> Enum.map(&parse_term/1)
|> Enum.reduce(%{}, fn {field, query}, acc ->
Map.put(acc, field,
Map.get(acc, field, []) ++ [query]
)
end)
|> to_es()
end
def search(query) do
q = %{query: %{
bool: %{
must: parse(query)
}
}}
IO.inspect(q)
def search_activities(q) do
Elastix.Search.search(
url(),
"activities",
["activity"],
q
url(),
"activities",
["activity"],
q
)
end
end

12
lib/pleroma/search.ex Normal file
View file

@ -0,0 +1,12 @@
defmodule Pleroma.Search do
@type search_map :: %{
statuses: [map],
accounts: [map],
hashtags: [map]
}
@doc """
Searches for stuff
"""
@callback search(map, map, keyword) :: search_map
end

View file

@ -0,0 +1,137 @@
defmodule Pleroma.Search.Builtin do
@behaviour Pleroma.Search
alias Pleroma.Repo
alias Pleroma.User
alias Pleroma.Activity
alias Pleroma.Web.MastodonAPI.AccountView
alias Pleroma.Web.MastodonAPI.StatusView
alias Pleroma.Web.Endpoint
require Logger
@impl Pleroma.Search
def search(_conn, %{q: query} = params, options) do
version = Keyword.get(options, :version)
timeout = Keyword.get(Repo.config(), :timeout, 15_000)
default_values = %{"statuses" => [], "accounts" => [], "hashtags" => []}
default_values
|> Enum.map(fn {resource, default_value} ->
if params[:type] in [nil, resource] do
{resource, fn -> resource_search(version, resource, query, options) end}
else
{resource, fn -> default_value end}
end
end)
|> Task.async_stream(fn {resource, f} -> {resource, with_fallback(f)} end,
timeout: timeout,
on_timeout: :kill_task
)
|> Enum.reduce(default_values, fn
{:ok, {resource, result}}, acc ->
Map.put(acc, resource, result)
_error, acc ->
acc
end)
end
defp resource_search(_, "accounts", query, options) do
accounts = with_fallback(fn -> User.search(query, options) end)
AccountView.render("index.json",
users: accounts,
for: options[:for_user],
embed_relationships: options[:embed_relationships]
)
end
defp resource_search(_, "statuses", query, options) do
statuses = with_fallback(fn -> Activity.search(options[:for_user], query, options) end)
StatusView.render("index.json",
activities: statuses,
for: options[:for_user],
as: :activity
)
end
defp resource_search(:v2, "hashtags", query, options) do
tags_path = Endpoint.url() <> "/tag/"
query
|> prepare_tags(options)
|> Enum.map(fn tag ->
%{name: tag, url: tags_path <> tag}
end)
end
defp resource_search(:v1, "hashtags", query, options) do
prepare_tags(query, options)
end
defp prepare_tags(query, options) do
tags =
query
|> preprocess_uri_query()
|> String.split(~r/[^#\w]+/u, trim: true)
|> Enum.uniq_by(&String.downcase/1)
explicit_tags = Enum.filter(tags, fn tag -> String.starts_with?(tag, "#") end)
tags =
if Enum.any?(explicit_tags) do
explicit_tags
else
tags
end
tags = Enum.map(tags, fn tag -> String.trim_leading(tag, "#") end)
tags =
if Enum.empty?(explicit_tags) && !options[:skip_joined_tag] do
add_joined_tag(tags)
else
tags
end
Pleroma.Pagination.paginate(tags, options)
end
# If `query` is a URI, returns last component of its path, otherwise returns `query`
defp preprocess_uri_query(query) do
if query =~ ~r/https?:\/\// do
query
|> String.trim_trailing("/")
|> URI.parse()
|> Map.get(:path)
|> String.split("/")
|> Enum.at(-1)
else
query
end
end
defp add_joined_tag(tags) do
tags
|> Kernel.++([joined_tag(tags)])
|> Enum.uniq_by(&String.downcase/1)
end
defp joined_tag(tags) do
tags
|> Enum.map(fn tag -> String.capitalize(tag) end)
|> Enum.join()
end
defp with_fallback(f, fallback \\ []) do
try do
f.()
rescue
error ->
Logger.error("#{__MODULE__} search error: #{inspect(error)}")
fallback
end
end
end

View file

@ -0,0 +1,80 @@
defmodule Pleroma.Search.Elasticsearch do
@behaviour Pleroma.Search
alias Pleroma.Web.MastodonAPI.StatusView
defp to_es(term) when is_binary(term) do
%{
match: %{
content: %{
query: term,
operator: "AND"
}
}
}
end
defp to_es({:quoted, term}), do: to_es(term)
defp to_es({:filter, ["hashtag", query]}) do
%{
term: %{
hashtags: %{
value: query
}
}
}
end
defp to_es({:filter, [field, query]}) do
%{
term: %{
field => %{
value: query
}
}
}
end
defp parse(query) do
query
|> SearchParser.parse!()
|> Enum.map(&to_es/1)
end
@impl Pleroma.Search
def search(%{assigns: %{user: user}} = _conn, %{q: query} = _params, _options) do
q = %{
query: %{
bool: %{
must: parse(query)
}
}
}
IO.inspect(q)
out = Pleroma.Elasticsearch.search_activities(q)
with {:ok, raw_results} <- out do
results =
raw_results
|> Map.get(:body, %{})
|> Map.get("hits", %{})
|> Map.get("hits", [])
|> Enum.map(fn result -> result["_id"] end)
|> Pleroma.Activity.all_by_ids_with_object()
%{
"accounts" => [],
"hashtags" => [],
"statuses" =>
StatusView.render("index.json",
activities: results,
for: user,
as: :activity
)
}
end
end
end

View file

@ -398,8 +398,9 @@ defmodule Pleroma.Web.CommonAPI do
end
def maybe_put_into_elasticsearch({:ok, activity}) do
if Config.get([:search, :provider]) == :elasticsearch do
if Config.get([:search, :provider]) == Pleroma.Search.Elasticsearch do
actor = Pleroma.Activity.user_actor(activity)
activity
|> Map.put(:user_actor, actor)
|> Elasticsearch.put()

View file

@ -5,13 +5,9 @@
defmodule Pleroma.Web.MastodonAPI.SearchController do
use Pleroma.Web, :controller
alias Pleroma.Activity
alias Pleroma.Repo
alias Pleroma.User
alias Pleroma.Web.ControllerHelper
alias Pleroma.Web.Endpoint
alias Pleroma.Web.MastodonAPI.AccountView
alias Pleroma.Web.MastodonAPI.StatusView
alias Pleroma.Web.Plugs.OAuthScopesPlug
alias Pleroma.Web.Plugs.RateLimiter
@ -43,71 +39,13 @@ defmodule Pleroma.Web.MastodonAPI.SearchController do
def search2(conn, params), do: do_search(:v2, conn, params)
def search(conn, params), do: do_search(:v1, conn, params)
defp do_search(version, %{assigns: %{user: user}} = conn, %{q: query} = params) do
query = String.trim(query)
options = search_options(params, user)
if Pleroma.Config.get([:search, :provider]) == :elasticsearch do
elasticsearch_search(conn, query, options)
else
builtin_search(version, conn, params)
end
end
defp do_search(version, %{assigns: %{user: user}} = conn, params) do
options =
search_options(params, user)
|> Keyword.put(:version, version)
defp elasticsearch_search(%{assigns: %{user: user}} = conn, query, options) do
with {:ok, raw_results} <- Pleroma.Elasticsearch.search(query) do
results = raw_results
|> Map.get(:body, %{})
|> Map.get("hits", %{})
|> Map.get("hits", [])
|> Enum.map(fn result -> result["_id"] end)
|> Pleroma.Activity.all_by_ids_with_object()
json(
conn,
%{
accounts: [],
hashtags: [],
statuses: StatusView.render("index.json",
activities: results,
for: user,
as: :activity
)}
)
else
{:error, _} ->
conn
|> put_status(:internal_server_error)
|> json(%{error: "Search failed"})
end
end
defp builtin_search(version, %{assigns: %{user: user}} = conn, %{q: query} = params) do
options = search_options(params, user)
timeout = Keyword.get(Repo.config(), :timeout, 15_000)
default_values = %{"statuses" => [], "accounts" => [], "hashtags" => []}
result =
default_values
|> Enum.map(fn {resource, default_value} ->
if params[:type] in [nil, resource] do
{resource, fn -> resource_search(version, resource, query, options) end}
else
{resource, fn -> default_value end}
end
end)
|> Task.async_stream(fn {resource, f} -> {resource, with_fallback(f)} end,
timeout: timeout,
on_timeout: :kill_task
)
|> Enum.reduce(default_values, fn
{:ok, {resource, result}}, acc ->
Map.put(acc, resource, result)
_error, acc ->
acc
end)
json(conn, result)
search_provider = Pleroma.Config.get([:search, :provider])
json(conn, search_provider.search(conn, params, options))
end
defp search_options(params, user) do
@ -124,104 +62,6 @@ defmodule Pleroma.Web.MastodonAPI.SearchController do
|> Enum.filter(&elem(&1, 1))
end
defp resource_search(_, "accounts", query, options) do
accounts = with_fallback(fn -> User.search(query, options) end)
AccountView.render("index.json",
users: accounts,
for: options[:for_user],
embed_relationships: options[:embed_relationships]
)
end
defp resource_search(_, "statuses", query, options) do
statuses = with_fallback(fn -> Activity.search(options[:for_user], query, options) end)
StatusView.render("index.json",
activities: statuses,
for: options[:for_user],
as: :activity
)
end
defp resource_search(:v2, "hashtags", query, options) do
tags_path = Endpoint.url() <> "/tag/"
query
|> prepare_tags(options)
|> Enum.map(fn tag ->
%{name: tag, url: tags_path <> tag}
end)
end
defp resource_search(:v1, "hashtags", query, options) do
prepare_tags(query, options)
end
defp prepare_tags(query, options) do
tags =
query
|> preprocess_uri_query()
|> String.split(~r/[^#\w]+/u, trim: true)
|> Enum.uniq_by(&String.downcase/1)
explicit_tags = Enum.filter(tags, fn tag -> String.starts_with?(tag, "#") end)
tags =
if Enum.any?(explicit_tags) do
explicit_tags
else
tags
end
tags = Enum.map(tags, fn tag -> String.trim_leading(tag, "#") end)
tags =
if Enum.empty?(explicit_tags) && !options[:skip_joined_tag] do
add_joined_tag(tags)
else
tags
end
Pleroma.Pagination.paginate(tags, options)
end
defp add_joined_tag(tags) do
tags
|> Kernel.++([joined_tag(tags)])
|> Enum.uniq_by(&String.downcase/1)
end
# If `query` is a URI, returns last component of its path, otherwise returns `query`
defp preprocess_uri_query(query) do
if query =~ ~r/https?:\/\// do
query
|> String.trim_trailing("/")
|> URI.parse()
|> Map.get(:path)
|> String.split("/")
|> Enum.at(-1)
else
query
end
end
defp joined_tag(tags) do
tags
|> Enum.map(fn tag -> String.capitalize(tag) end)
|> Enum.join()
end
defp with_fallback(f, fallback \\ []) do
try do
f.()
rescue
error ->
Logger.error("#{__MODULE__} search error: #{inspect(error)}")
fallback
end
end
defp get_author(%{account_id: account_id}) when is_binary(account_id),
do: User.get_cached_by_id(account_id)

View file

@ -91,7 +91,7 @@ defmodule Pleroma.Mixfile do
defp elixirc_paths(_), do: ["lib"]
defp warnings_as_errors(:prod), do: false
defp warnings_as_errors(_), do: false
defp warnings_as_errors(_), do: true
# Specifies OAuth dependencies.
defp oauth_deps do
@ -198,6 +198,10 @@ defmodule Pleroma.Mixfile do
{:eblurhash, "~> 1.1.0"},
{:open_api_spex, "~> 3.10"},
{:elastix, ">= 0.0.0"},
{:search_parser,
git: "https://github.com/FloatingGhost/pleroma-contrib-search-parser.git",
ref: "08971a81e68686f9ac465cfb6661d51c5e4e1e7f"},
{:nimble_parsec, "~> 1.0", override: true},
# indirect dependency version override
{:plug, "~> 1.10.4", override: true},

View file

@ -83,7 +83,7 @@
"mogrify": {:hex, :mogrify, "0.9.1", "a26f107c4987477769f272bd0f7e3ac4b7b75b11ba597fd001b877beffa9c068", [:mix], [], "hexpm", "134edf189337d2125c0948bf0c228fdeef975c594317452d536224069a5b7f05"},
"mox": {:hex, :mox, "1.0.0", "4b3c7005173f47ff30641ba044eb0fe67287743eec9bd9545e37f3002b0a9f8b", [:mix], [], "hexpm", "201b0a20b7abdaaab083e9cf97884950f8a30a1350a1da403b3145e213c6f4df"},
"myhtmlex": {:git, "https://git.pleroma.social/pleroma/myhtmlex.git", "ad0097e2f61d4953bfef20fb6abddf23b87111e6", [ref: "ad0097e2f61d4953bfef20fb6abddf23b87111e6", submodules: true]},
"nimble_parsec": {:hex, :nimble_parsec, "0.5.0", "90e2eca3d0266e5c53f8fbe0079694740b9c91b6747f2b7e3c5d21966bba8300", [:mix], [], "hexpm", "5c040b8469c1ff1b10093d3186e2e10dbe483cd73d79ec017993fb3985b8a9b3"},
"nimble_parsec": {:hex, :nimble_parsec, "1.2.0", "b44d75e2a6542dcb6acf5d71c32c74ca88960421b6874777f79153bbbbd7dccc", [:mix], [], "hexpm", "52b2871a7515a5ac49b00f214e4165a40724cf99798d8e4a65e4fd64ebd002c1"},
"nimble_pool": {:hex, :nimble_pool, "0.1.0", "ffa9d5be27eee2b00b0c634eb649aa27f97b39186fec3c493716c2a33e784ec6", [:mix], [], "hexpm", "343a1eaa620ddcf3430a83f39f2af499fe2370390d4f785cd475b4df5acaf3f9"},
"nodex": {:git, "https://git.pleroma.social/pleroma/nodex", "cb6730f943cfc6aad674c92161be23a8411f15d1", [ref: "cb6730f943cfc6aad674c92161be23a8411f15d1"]},
"oban": {:hex, :oban, "2.3.4", "ec7509b9af2524d55f529cb7aee93d36131ae0bf0f37706f65d2fe707f4d9fd8", [:mix], [{:ecto_sql, ">= 3.4.3", [hex: :ecto_sql, repo: "hexpm", optional: false]}, {:jason, "~> 1.1", [hex: :jason, repo: "hexpm", optional: false]}, {:postgrex, "~> 0.14", [hex: :postgrex, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "c70ca0434758fd1805422ea4446af5e910ddc697c0c861549c8f0eb0cfbd2fdf"},
@ -115,6 +115,7 @@
"recon": {:hex, :recon, "2.5.1", "430ffa60685ac1efdfb1fe4c97b8767c92d0d92e6e7c3e8621559ba77598678a", [:mix, :rebar3], [], "hexpm", "5721c6b6d50122d8f68cccac712caa1231f97894bab779eff5ff0f886cb44648"},
"remote_ip": {:git, "https://git.pleroma.social/pleroma/remote_ip.git", "b647d0deecaa3acb140854fe4bda5b7e1dc6d1c8", [ref: "b647d0deecaa3acb140854fe4bda5b7e1dc6d1c8"]},
"retry": {:hex, :retry, "0.15.0", "ba6aaeba92905a396c18c299a07e638947b2ba781e914f803202bc1b9ae867c3", [:mix], [], "hexpm", "93d3310bce78c0a30cc94610684340a14adfc9136856a3f662e4d9ce6013c784"},
"search_parser": {:git, "https://github.com/FloatingGhost/pleroma-contrib-search-parser.git", "08971a81e68686f9ac465cfb6661d51c5e4e1e7f", [ref: "08971a81e68686f9ac465cfb6661d51c5e4e1e7f"]},
"sleeplocks": {:hex, :sleeplocks, "1.1.1", "3d462a0639a6ef36cc75d6038b7393ae537ab394641beb59830a1b8271faeed3", [:rebar3], [], "hexpm", "84ee37aeff4d0d92b290fff986d6a95ac5eedf9b383fadfd1d88e9b84a1c02e1"},
"ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.6", "cf344f5692c82d2cd7554f5ec8fd961548d4fd09e7d22f5b62482e5aeaebd4b0", [:make, :mix, :rebar3], [], "hexpm", "bdb0d2471f453c88ff3908e7686f86f9be327d065cc1ec16fa4540197ea04680"},
"sweet_xml": {:hex, :sweet_xml, "0.6.6", "fc3e91ec5dd7c787b6195757fbcf0abc670cee1e4172687b45183032221b66b8", [:mix], [], "hexpm", "2e1ec458f892ffa81f9f8386e3f35a1af6db7a7a37748a64478f13163a1f3573"},

View file

@ -133,13 +133,13 @@ defmodule Pleroma.Web.RichMedia.ParserTest do
assert Parser.parse("http://example.com/oembed") ==
{:ok,
%{
"author_name" => "bees",
"author_name" => "\u202E\u202D\u202Cbees\u202C",
"author_url" => "https://www.flickr.com/photos/bees/",
"cache_age" => 3600,
"flickr_type" => "photo",
"height" => "768",
"html" =>
"<a data-flickr-embed=\"true\" href=\"https://www.flickr.com/photos/bees/2362225867/\" title=\"Bacon Lollys by bees, on Flickr\"><img src=\"https://farm4.staticflickr.com/3040/2362225867_4a87ab8baf_b.jpg\" width=\"1024\" height=\"768\" alt=\"Bacon Lollys\"></a><script async src=\"https://embedr.flickr.com/assets/client-code.js\" charset=\"utf-8\"></script>",
"<a data-flickr-embed=\"true\" href=\"https://www.flickr.com/photos/bees/2362225867/\" title=\"Bacon Lollys by \u202E\u202D\u202Cbees\u202C, on Flickr\"><img src=\"https://farm4.staticflickr.com/3040/2362225867_4a87ab8baf_b.jpg\" width=\"1024\" height=\"768\" alt=\"Bacon Lollys\"></a><script async src=\"https://embedr.flickr.com/assets/client-code.js\" charset=\"utf-8\"></script>",
"license" => "All Rights Reserved",
"license_id" => 0,
"provider_name" => "Flickr",