Merge branch 'rum-index' into 'develop'

Search: Use RUM index.

See merge request 
This commit is contained in:
rinpatch 2019-05-17 16:35:19 +00:00
commit d2dacadb6b
6 changed files with 105 additions and 12 deletions
.gitlab-ci.yml
config
docs
lib/pleroma/web/mastodon_api
priv/repo/optional_migrations/rum_indexing

View file

@ -45,7 +45,8 @@ docs-build:
unit-testing: unit-testing:
stage: test stage: test
services: services:
- name: postgres:9.6.2 - name: lainsoykaf/postgres-with-rum
alias: postgres
command: ["postgres", "-c", "fsync=off", "-c", "synchronous_commit=off", "-c", "full_page_writes=off"] command: ["postgres", "-c", "fsync=off", "-c", "synchronous_commit=off", "-c", "full_page_writes=off"]
script: script:
- mix deps.get - mix deps.get
@ -54,6 +55,21 @@ unit-testing:
- mix test --trace --preload-modules - mix test --trace --preload-modules
- mix coveralls - mix coveralls
unit-testing-rum:
stage: test
services:
- name: lainsoykaf/postgres-with-rum
alias: postgres
command: ["postgres", "-c", "fsync=off", "-c", "synchronous_commit=off", "-c", "full_page_writes=off"]
variables:
RUM_ENABLED: "true"
script:
- mix deps.get
- mix ecto.create
- mix ecto.migrate
- "mix ecto.migrate --migrations-path priv/repo/optional_migrations/rum_indexing/"
- mix test --trace --preload-modules
lint: lint:
stage: test stage: test
script: script:
@ -65,7 +81,6 @@ analysis:
- mix deps.get - mix deps.get
- mix credo --strict --only=warnings,todo,fixme,consistency,readability - mix credo --strict --only=warnings,todo,fixme,consistency,readability
docs-deploy: docs-deploy:
stage: deploy stage: deploy
image: alpine:3.9 image: alpine:3.9

View file

@ -465,6 +465,8 @@
token_expires_in: 600, token_expires_in: 600,
issue_new_refresh_token: true issue_new_refresh_token: true
config :pleroma, :database, rum_enabled: false
config :http_signatures, config :http_signatures,
adapter: Pleroma.Signature adapter: Pleroma.Signature

View file

@ -63,6 +63,10 @@
config :pleroma, :http_security, report_uri: "https://endpoint.com" config :pleroma, :http_security, report_uri: "https://endpoint.com"
rum_enabled = System.get_env("RUM_ENABLED") == "true"
config :pleroma, :database, rum_enabled: rum_enabled
IO.puts("RUM enabled: #{rum_enabled}")
try do try do
import_config "test.secret.exs" import_config "test.secret.exs"
rescue rescue

View file

@ -544,3 +544,18 @@ Configure OAuth 2 provider capabilities:
* `shortcode_globs`: Location of custom emoji files. `*` can be used as a wildcard. Example `["/emoji/custom/**/*.png"]` * `shortcode_globs`: Location of custom emoji files. `*` can be used as a wildcard. Example `["/emoji/custom/**/*.png"]`
* `groups`: Emojis are ordered in groups (tags). This is an array of key-value pairs where the key is the groupname and the value the location or array of locations. `*` can be used as a wildcard. Example `[Custom: ["/emoji/*.png", "/emoji/custom/*.png"]]` * `groups`: Emojis are ordered in groups (tags). This is an array of key-value pairs where the key is the groupname and the value the location or array of locations. `*` can be used as a wildcard. Example `[Custom: ["/emoji/*.png", "/emoji/custom/*.png"]]`
* `default_manifest`: Location of the JSON-manifest. This manifest contains information about the emoji-packs you can download. Currently only one manifest can be added (no arrays). * `default_manifest`: Location of the JSON-manifest. This manifest contains information about the emoji-packs you can download. Currently only one manifest can be added (no arrays).
## Database options
### RUM indexing for full text search
* `rum_enabled`: If RUM indexes should be used. Defaults to `false`.
RUM indexes are an alternative indexing scheme that is not included in PostgreSQL by default. While they may eventually be mainlined, for now they have to be installed as a PostgreSQL extension from https://github.com/postgrespro/rum.
Their advantage over the standard GIN indexes is that they allow efficient ordering of search results by timestamp, which makes search queries a lot faster on larger servers, by one or two orders of magnitude. They take up around 3 times as much space as GIN indexes.
To enable them, both the `rum_enabled` flag has to be set and the following special migration has to be run:
`mix ecto.migrate --migrations-path priv/repo/optional_migrations/rum_indexing/`
This will probably take a long time.

View file

@ -1009,6 +1009,30 @@ def unsubscribe(%{assigns: %{user: user}} = conn, %{"id" => id}) do
end end
end end
def status_search_query_with_gin(q, query) do
from([a, o] in q,
where:
fragment(
"to_tsvector('english', ?->>'content') @@ plainto_tsquery('english', ?)",
o.data,
^query
),
order_by: [desc: :id]
)
end
def status_search_query_with_rum(q, query) do
from([a, o] in q,
where:
fragment(
"? @@ plainto_tsquery('english', ?)",
o.fts_content,
^query
),
order_by: [fragment("? <=> now()::date", o.inserted_at)]
)
end
def status_search(user, query) do def status_search(user, query) do
fetched = fetched =
if Regex.match?(~r/https?:/, query) do if Regex.match?(~r/https?:/, query) do
@ -1022,20 +1046,19 @@ def status_search(user, query) do
end || [] end || []
q = q =
from( from([a, o] in Activity.with_preloaded_object(Activity),
[a, o] in Activity.with_preloaded_object(Activity),
where: fragment("?->>'type' = 'Create'", a.data), where: fragment("?->>'type' = 'Create'", a.data),
where: "https://www.w3.org/ns/activitystreams#Public" in a.recipients, where: "https://www.w3.org/ns/activitystreams#Public" in a.recipients,
where: limit: 20
fragment(
"to_tsvector('english', ?->>'content') @@ plainto_tsquery('english', ?)",
o.data,
^query
),
limit: 20,
order_by: [desc: :id]
) )
q =
if Pleroma.Config.get([:database, :rum_enabled]) do
status_search_query_with_rum(q, query)
else
status_search_query_with_gin(q, query)
end
Repo.all(q) ++ fetched Repo.all(q) ++ fetched
end end

View file

@ -0,0 +1,34 @@
defmodule Pleroma.Repo.Migrations.AddFtsIndexToObjectsTwo do
use Ecto.Migration
def up do
execute("create extension if not exists rum")
drop_if_exists index(:objects, ["(to_tsvector('english', data->>'content'))"], using: :gin, name: :objects_fts)
alter table(:objects) do
add(:fts_content, :tsvector)
end
execute("CREATE FUNCTION objects_fts_update() RETURNS trigger AS $$
begin
new.fts_content := to_tsvector('english', new.data->>'content');
return new;
end
$$ LANGUAGE plpgsql")
execute("create index objects_fts on objects using RUM (fts_content rum_tsvector_addon_ops, inserted_at) with (attach = 'inserted_at', to = 'fts_content');")
execute("CREATE TRIGGER tsvectorupdate BEFORE INSERT OR UPDATE ON objects
FOR EACH ROW EXECUTE PROCEDURE objects_fts_update()")
execute("UPDATE objects SET updated_at = NOW()")
end
def down do
execute "drop index objects_fts"
execute "drop trigger tsvectorupdate on objects"
execute "drop function objects_fts_update()"
alter table(:objects) do
remove(:fts_content, :tsvector)
end
create index(:objects, ["(to_tsvector('english', data->>'content'))"], using: :gin, name: :objects_fts)
end
end