Add documentation for ES search
Some checks failed
ci/woodpecker/push/release Pipeline was successful
ci/woodpecker/push/lint Pipeline was successful
ci/woodpecker/pr/release Pipeline was successful
ci/woodpecker/pr/lint Pipeline was successful
ci/woodpecker/pr/test Pipeline failed
ci/woodpecker/push/test Pipeline failed
Some checks failed
ci/woodpecker/push/release Pipeline was successful
ci/woodpecker/push/lint Pipeline was successful
ci/woodpecker/pr/release Pipeline was successful
ci/woodpecker/pr/lint Pipeline was successful
ci/woodpecker/pr/test Pipeline failed
ci/woodpecker/push/test Pipeline failed
This commit is contained in:
parent
635a3c223a
commit
bc9e76cce7
4 changed files with 278 additions and 0 deletions
|
@ -3472,5 +3472,90 @@
|
||||||
suggestion: [100_000]
|
suggestion: [100_000]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
%{
|
||||||
|
group: :pleroma,
|
||||||
|
key: Pleroma.Search.Elasticsearch.Cluster,
|
||||||
|
type: :group,
|
||||||
|
description: "Elasticsearch settings.",
|
||||||
|
children: [
|
||||||
|
%{
|
||||||
|
key: :url,
|
||||||
|
type: :string,
|
||||||
|
description: "Elasticsearch URL.",
|
||||||
|
suggestion: ["http://127.0.0.1:9200/"]
|
||||||
|
},
|
||||||
|
%{
|
||||||
|
key: :username,
|
||||||
|
type: :string,
|
||||||
|
description: "Username to connect to ES. Set to nil if your cluster is unauthenticated.",
|
||||||
|
suggestion: ["elastic"]
|
||||||
|
},
|
||||||
|
%{
|
||||||
|
key: :password,
|
||||||
|
type: :string,
|
||||||
|
description: "Password to connect to ES. Set to nil if your cluster is unauthenticated.",
|
||||||
|
suggestion: ["changeme"]
|
||||||
|
},
|
||||||
|
%{
|
||||||
|
key: :api,
|
||||||
|
type: :module,
|
||||||
|
description:
|
||||||
|
"The API module used by Elasticsearch. Should always be Elasticsearch.API.HTTP",
|
||||||
|
suggestion: [Elasticsearch.API.HTTP]
|
||||||
|
},
|
||||||
|
%{
|
||||||
|
key: :json_library,
|
||||||
|
type: :module,
|
||||||
|
description:
|
||||||
|
"The JSON module used to encode/decode when communicating with Elasticsearch",
|
||||||
|
suggestion: [Jason]
|
||||||
|
},
|
||||||
|
%{
|
||||||
|
key: :indexes,
|
||||||
|
type: :map,
|
||||||
|
description: "The indices to set up in Elasticsearch",
|
||||||
|
children: [
|
||||||
|
%{
|
||||||
|
key: :activities,
|
||||||
|
type: :map,
|
||||||
|
description: "Config for the index to use for activities",
|
||||||
|
children: [
|
||||||
|
%{
|
||||||
|
key: :settings,
|
||||||
|
type: :string,
|
||||||
|
description:
|
||||||
|
"Path to the file containing index settings for the activities index. Should contain a mapping.",
|
||||||
|
suggestion: ["priv/es-mappings/activity.json"]
|
||||||
|
},
|
||||||
|
%{
|
||||||
|
key: :store,
|
||||||
|
type: :module,
|
||||||
|
description: "The internal store module",
|
||||||
|
suggestion: [Pleroma.Search.Elasticsearch.Store]
|
||||||
|
},
|
||||||
|
%{
|
||||||
|
key: :sources,
|
||||||
|
type: {:list, :module},
|
||||||
|
description: "The internal types to use for this index",
|
||||||
|
suggestion: [[Pleroma.Activity]]
|
||||||
|
},
|
||||||
|
%{
|
||||||
|
key: :bulk_page_size,
|
||||||
|
type: :int,
|
||||||
|
description: "Size for bulk put requests, mostly used on building the index",
|
||||||
|
suggestion: [5000]
|
||||||
|
},
|
||||||
|
%{
|
||||||
|
key: :bulk_wait_interval,
|
||||||
|
type: :int,
|
||||||
|
description: "Time to wait between bulk put requests (in ms)",
|
||||||
|
suggestion: [15_000]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
|
@ -121,3 +121,43 @@ This will clear **all** the posts from the search index. Note, that deleted post
|
||||||
there is no need to actually clear the whole index, unless you want **all** of it gone. That said, the index does not hold any information
|
there is no need to actually clear the whole index, unless you want **all** of it gone. That said, the index does not hold any information
|
||||||
that cannot be re-created from the database, it should also generally be a lot smaller than the size of your database. Still, the size
|
that cannot be re-created from the database, it should also generally be a lot smaller than the size of your database. Still, the size
|
||||||
depends on the amount of text in posts.
|
depends on the amount of text in posts.
|
||||||
|
|
||||||
|
## Elasticsearch
|
||||||
|
|
||||||
|
As with meilisearch, this can be rather memory-hungry, but it is very good at what it does.
|
||||||
|
|
||||||
|
To use [elasticsearch](https://www.elastic.co/), set the search module to `Pleroma.Search.Elasticsearch`:
|
||||||
|
|
||||||
|
> config :pleroma, Pleroma.Search, module: Pleroma.Search.Elasticsearch
|
||||||
|
|
||||||
|
You then need to set the URL and authentication credentials if relevant.
|
||||||
|
|
||||||
|
> config :pleroma, Pleroma.Search.Elasticsearch.Cluster,
|
||||||
|
> url: "http://127.0.0.1:9200/",
|
||||||
|
> username: "elastic",
|
||||||
|
> password: "changeme",
|
||||||
|
|
||||||
|
### Initial indexing
|
||||||
|
|
||||||
|
After setting up the configuration, you'll want to index all of your already existsing posts. Only public posts are indexed. You'll only
|
||||||
|
have to do it one time, but it might take a while, depending on the amount of posts your instance has seen.
|
||||||
|
|
||||||
|
The sequence of actions is as follows:
|
||||||
|
|
||||||
|
1. First, change the configuration to use `Pleroma.Search.Elasticsearch` as the search backend
|
||||||
|
2. Restart your instance, at this point it can be used while the search indexing is running, though search won't return anything
|
||||||
|
3. Start the initial indexing process (as described below with `index`),
|
||||||
|
and wait until the task says it sent everything from the database to index
|
||||||
|
4. Wait until the index tasks exits
|
||||||
|
|
||||||
|
To start the initial indexing, run the `build` command:
|
||||||
|
|
||||||
|
=== "OTP"
|
||||||
|
```sh
|
||||||
|
./bin/pleroma_ctl search.elasticsearch index activities --cluster Pleroma.Search.Elasticsearch.Cluster
|
||||||
|
```
|
||||||
|
|
||||||
|
=== "From Source"
|
||||||
|
```sh
|
||||||
|
mix elasticsearch.build activities --cluster Pleroma.Search.Elasticsearch.Cluster
|
||||||
|
```
|
9
lib/mix/tasks/pleroma/search/elasticsearch.ex
Normal file
9
lib/mix/tasks/pleroma/search/elasticsearch.ex
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
defmodule Mix.Tasks.Pleroma.Search.Elasticsearch do
|
||||||
|
alias Mix.Tasks.Elasticsearch.Build
|
||||||
|
import Mix.Pleroma
|
||||||
|
|
||||||
|
def run(["index" | args]) do
|
||||||
|
start_pleroma()
|
||||||
|
Build.run(args)
|
||||||
|
end
|
||||||
|
end
|
144
lib/mix/tasks/pleroma/search/meilisearch.ex
Normal file
144
lib/mix/tasks/pleroma/search/meilisearch.ex
Normal file
|
@ -0,0 +1,144 @@
|
||||||
|
# Pleroma: A lightweight social networking server
|
||||||
|
# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
|
||||||
|
# SPDX-License-Identifier: AGPL-3.0-only
|
||||||
|
|
||||||
|
defmodule Mix.Tasks.Pleroma.Search.Meilisearch do
|
||||||
|
require Pleroma.Constants
|
||||||
|
|
||||||
|
import Mix.Pleroma
|
||||||
|
import Ecto.Query
|
||||||
|
|
||||||
|
import Pleroma.Search.Meilisearch,
|
||||||
|
only: [meili_post: 2, meili_put: 2, meili_get: 1, meili_delete!: 1]
|
||||||
|
|
||||||
|
def run(["index"]) do
|
||||||
|
start_pleroma()
|
||||||
|
|
||||||
|
meili_version =
|
||||||
|
(
|
||||||
|
{:ok, result} = meili_get("/version")
|
||||||
|
|
||||||
|
result["pkgVersion"]
|
||||||
|
)
|
||||||
|
|
||||||
|
# The ranking rule syntax was changed but nothing about that is mentioned in the changelog
|
||||||
|
if not Version.match?(meili_version, ">= 0.25.0") do
|
||||||
|
raise "Meilisearch <0.24.0 not supported"
|
||||||
|
end
|
||||||
|
|
||||||
|
{:ok, _} =
|
||||||
|
meili_post(
|
||||||
|
"/indexes/objects/settings/ranking-rules",
|
||||||
|
[
|
||||||
|
"published:desc",
|
||||||
|
"words",
|
||||||
|
"exactness",
|
||||||
|
"proximity",
|
||||||
|
"typo",
|
||||||
|
"attribute",
|
||||||
|
"sort"
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
{:ok, _} =
|
||||||
|
meili_post(
|
||||||
|
"/indexes/objects/settings/searchable-attributes",
|
||||||
|
[
|
||||||
|
"content"
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
IO.puts("Created indices. Starting to insert posts.")
|
||||||
|
|
||||||
|
chunk_size = Pleroma.Config.get([Pleroma.Search.Meilisearch, :initial_indexing_chunk_size])
|
||||||
|
|
||||||
|
Pleroma.Repo.transaction(
|
||||||
|
fn ->
|
||||||
|
query =
|
||||||
|
from(Pleroma.Object,
|
||||||
|
# Only index public and unlisted posts which are notes and have some text
|
||||||
|
where:
|
||||||
|
fragment("data->>'type' = 'Note'") and
|
||||||
|
(fragment("data->'to' \\? ?", ^Pleroma.Constants.as_public()) or
|
||||||
|
fragment("data->'cc' \\? ?", ^Pleroma.Constants.as_public())),
|
||||||
|
order_by: [desc: fragment("data->'published'")]
|
||||||
|
)
|
||||||
|
|
||||||
|
count = query |> Pleroma.Repo.aggregate(:count, :data)
|
||||||
|
IO.puts("Entries to index: #{count}")
|
||||||
|
|
||||||
|
Pleroma.Repo.stream(
|
||||||
|
query,
|
||||||
|
timeout: :infinity
|
||||||
|
)
|
||||||
|
|> Stream.map(&Pleroma.Search.Meilisearch.object_to_search_data/1)
|
||||||
|
|> Stream.filter(fn o -> not is_nil(o) end)
|
||||||
|
|> Stream.chunk_every(chunk_size)
|
||||||
|
|> Stream.transform(0, fn objects, acc ->
|
||||||
|
new_acc = acc + Enum.count(objects)
|
||||||
|
|
||||||
|
# Reset to the beginning of the line and rewrite it
|
||||||
|
IO.write("\r")
|
||||||
|
IO.write("Indexed #{new_acc} entries")
|
||||||
|
|
||||||
|
{[objects], new_acc}
|
||||||
|
end)
|
||||||
|
|> Stream.each(fn objects ->
|
||||||
|
result =
|
||||||
|
meili_put(
|
||||||
|
"/indexes/objects/documents",
|
||||||
|
objects
|
||||||
|
)
|
||||||
|
|
||||||
|
with {:ok, res} <- result do
|
||||||
|
if not Map.has_key?(res, "uid") do
|
||||||
|
IO.puts("\nFailed to index: #{inspect(result)}")
|
||||||
|
end
|
||||||
|
else
|
||||||
|
e -> IO.puts("\nFailed to index due to network error: #{inspect(e)}")
|
||||||
|
end
|
||||||
|
end)
|
||||||
|
|> Stream.run()
|
||||||
|
end,
|
||||||
|
timeout: :infinity
|
||||||
|
)
|
||||||
|
|
||||||
|
IO.write("\n")
|
||||||
|
end
|
||||||
|
|
||||||
|
def run(["clear"]) do
|
||||||
|
start_pleroma()
|
||||||
|
|
||||||
|
meili_delete!("/indexes/objects/documents")
|
||||||
|
end
|
||||||
|
|
||||||
|
def run(["show-keys", master_key]) do
|
||||||
|
start_pleroma()
|
||||||
|
|
||||||
|
endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url])
|
||||||
|
|
||||||
|
{:ok, result} =
|
||||||
|
Pleroma.HTTP.get(
|
||||||
|
Path.join(endpoint, "/keys"),
|
||||||
|
[{"Authorization", "Bearer #{master_key}"}]
|
||||||
|
)
|
||||||
|
|
||||||
|
decoded = Jason.decode!(result.body)
|
||||||
|
|
||||||
|
if decoded["results"] do
|
||||||
|
Enum.each(decoded["results"], fn %{"description" => desc, "key" => key} ->
|
||||||
|
IO.puts("#{desc}: #{key}")
|
||||||
|
end)
|
||||||
|
else
|
||||||
|
IO.puts("Error fetching the keys, check the master key is correct: #{inspect(decoded)}")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def run(["stats"]) do
|
||||||
|
start_pleroma()
|
||||||
|
|
||||||
|
{:ok, result} = meili_get("/indexes/objects/stats")
|
||||||
|
IO.puts("Number of entries: #{result["numberOfDocuments"]}")
|
||||||
|
IO.puts("Indexing? #{result["isIndexing"]}")
|
||||||
|
end
|
||||||
|
end
|
Loading…
Reference in a new issue