Add full text search for accounts

This commit is contained in:
noellabo 2022-04-24 04:54:49 +09:00
parent 57ddd35698
commit 8b3ef9cc81
15 changed files with 202 additions and 21 deletions

View file

@ -49,8 +49,8 @@ class AccountsIndex < Chewy::Index
sudachi_tokenizer: {
type: 'sudachi_tokenizer',
discard_punctuation: true,
resources_path: '/etc/elasticsearch',
settings_path: '/etc/elasticsearch/sudachi.json',
resources_path: '/etc/elasticsearch/sudachi',
settings_path: '/etc/elasticsearch/sudachi/sudachi.json',
},
},
@ -78,6 +78,7 @@ class AccountsIndex < Chewy::Index
field :actor_type, type: 'keyword', normalizer: 'keyword'
field :text, type: 'text', value: ->(account) { account.index_text } do
field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content'
field :stemmed, type: 'text', analyzer: 'sudachi_content'
end

View file

@ -11,8 +11,8 @@ class StatusesIndex < Chewy::Index
sudachi_tokenizer: {
type: 'sudachi_tokenizer',
discard_punctuation: true,
resources_path: '/etc/elasticsearch',
settings_path: '/etc/elasticsearch/sudachi.json',
resources_path: '/etc/elasticsearch/sudachi',
settings_path: '/etc/elasticsearch/sudachi/sudachi.json',
},
},
analyzer: {

View file

@ -25,6 +25,6 @@ class Api::V2::SearchController < Api::BaseController
end
def search_params
params.permit(:type, :offset, :min_id, :max_id, :account_id)
params.permit(:type, :offset, :min_id, :max_id, :account_id, :with_profiles)
end
end

View file

@ -32,7 +32,7 @@ export function submitSearch() {
const value = getState().getIn(['search', 'value']);
if (value.length === 0) {
dispatch(fetchSearchSuccess({ accounts: [], statuses: [], hashtags: [] }, ''));
dispatch(fetchSearchSuccess({ accounts: [], statuses: [], hashtags: [], profiles: [] }, ''));
return;
}
@ -43,12 +43,17 @@ export function submitSearch() {
q: value,
resolve: true,
limit: 5,
with_profiles: true,
},
}).then(response => {
if (response.data.accounts) {
dispatch(importFetchedAccounts(response.data.accounts));
}
if (response.data.profiles) {
dispatch(importFetchedAccounts(response.data.profiles));
}
if (response.data.statuses) {
dispatch(importFetchedStatuses(response.data.statuses));
dispatch(fetchAccountsFromStatuses(response.data.statuses));
@ -56,6 +61,7 @@ export function submitSearch() {
dispatch(fetchSearchSuccess(response.data, value));
dispatch(fetchRelationships(response.data.accounts.map(item => item.id)));
dispatch(fetchRelationships(response.data.profiles.map(item => item.id)));
}).catch(error => {
dispatch(fetchSearchFail(error));
});
@ -94,12 +100,17 @@ export const expandSearch = type => (dispatch, getState) => {
q: value,
type,
offset,
with_profiles: true,
},
}).then(({ data }) => {
if (data.accounts) {
dispatch(importFetchedAccounts(data.accounts));
}
if (data.profiles) {
dispatch(importFetchedAccounts(data.profiles));
}
if (data.statuses) {
dispatch(importFetchedStatuses(data.statuses));
dispatch(fetchAccountsFromStatuses(data.statuses));
@ -107,6 +118,7 @@ export const expandSearch = type => (dispatch, getState) => {
dispatch(expandSearchSuccess(data, value, type));
dispatch(fetchRelationships(data.accounts.map(item => item.id)));
dispatch(fetchRelationships(data.profiles.map(item => item.id)));
}).catch(error => {
dispatch(expandSearchFail(error));
});

View file

@ -45,6 +45,8 @@ class SearchResults extends ImmutablePureComponent {
handleLoadMoreHashtags = () => this.props.expandSearch('hashtags');
handleLoadMoreProfiles = () => this.props.expandSearch('profiles');
render () {
const { intl, results, suggestions, dismissSuggestion, searchTerm } = this.props;
@ -71,7 +73,7 @@ class SearchResults extends ImmutablePureComponent {
);
}
let accounts, statuses, hashtags;
let accounts, statuses, hashtags, profiles;
let count = 0;
if (results.get('accounts') && results.get('accounts').size > 0) {
@ -87,6 +89,19 @@ class SearchResults extends ImmutablePureComponent {
);
}
if (results.get('profiles') && results.get('profiles').size > 0) {
count += results.get('profiles').size;
profiles = (
<div className='search-results__section'>
<h5><Icon id='users' fixedWidth /><FormattedMessage id='search_results.profiles' defaultMessage='People with matching profiles' /></h5>
{results.get('profiles').map(accountId => <AccountContainer key={accountId} id={accountId} />)}
{results.get('profiles').size >= 5 && <LoadMore visible onClick={this.handleLoadMoreProfiles} />}
</div>
);
}
if (results.get('statuses') && results.get('statuses').size > 0) {
count += results.get('statuses').size;
statuses = (
@ -131,6 +146,7 @@ class SearchResults extends ImmutablePureComponent {
</div>
{accounts}
{profiles}
{statuses}
{hashtags}
</div>

View file

@ -497,6 +497,7 @@
"search_popout.tips.user": "user",
"search_results.accounts": "People",
"search_results.hashtags": "Hashtags",
"search_results.profiles": "People with matching profiles",
"search_results.statuses": "Posts",
"search_results.statuses_fts_disabled": "Searching posts by their content is not enabled on this Mastodon server.",
"search_results.total": "{count, number} {count, plural, one {result} other {results}}",

View file

@ -497,6 +497,7 @@
"search_popout.tips.user": "ユーザー",
"search_results.accounts": "人々",
"search_results.hashtags": "ハッシュタグ",
"search_results.profiles": "プロフィールに一致する人々",
"search_results.statuses": "投稿",
"search_results.statuses_fts_disabled": "このサーバーでは投稿本文の検索は利用できません。",
"search_results.total": "{count, number}件の結果",

View file

@ -44,6 +44,7 @@ export default function search(state = initialState, action) {
accounts: ImmutableList(action.results.accounts.map(item => item.id)),
statuses: ImmutableList(action.results.statuses.map(item => item.id)),
hashtags: fromJS(action.results.hashtags),
profiles: ImmutableList(action.results.profiles.map(item => item.id)),
})).set('submitted', true).set('searchTerm', action.searchTerm);
case SEARCH_EXPAND_SUCCESS:
const results = action.searchType === 'hashtags' ? fromJS(action.results.hashtags) : action.results[action.searchType].map(item => item.id);

View file

@ -0,0 +1,88 @@
# frozen_string_literal: true
class AccountSearchQueryTransformer < Parslet::Transform
class Query
attr_reader :should_clauses, :must_not_clauses, :must_clauses
def initialize(clauses)
grouped = clauses.chunk(&:operator).to_h
@should_clauses = grouped.fetch(:should, [])
@must_not_clauses = grouped.fetch(:must_not, [])
@must_clauses = grouped.fetch(:must, [])
end
def apply(search)
should_clauses.each { |clause| search = search.query.should(clause_to_query(clause)) }
must_clauses.each { |clause| search = search.query.must(clause_to_query(clause)) }
must_not_clauses.each { |clause| search = search.query.must_not(clause_to_query(clause)) }
search.query.minimum_should_match(1)
end
private
def clause_to_query(clause)
case clause
when TermClause
{ multi_match: { type: 'most_fields', query: clause.term, fields: ['text.edge_ngram', 'text.stemmed'] } }
when PhraseClause
{ match_phrase: { text: { query: clause.phrase } } }
else
raise "Unexpected clause type: #{clause}"
end
end
end
class Operator
class << self
def symbol(str)
case str
when '+'
:must
when '-'
:must_not
when nil
:should
else
raise "Unknown operator: #{str}"
end
end
end
end
class TermClause
attr_reader :prefix, :operator, :term
def initialize(prefix, operator, term)
@prefix = prefix
@operator = Operator.symbol(operator)
@term = term
end
end
class PhraseClause
attr_reader :prefix, :operator, :phrase
def initialize(prefix, operator, phrase)
@prefix = prefix
@operator = Operator.symbol(operator)
@phrase = phrase
end
end
rule(clause: subtree(:clause)) do
prefix = clause[:prefix][:term].to_s if clause[:prefix]
operator = clause[:operator]&.to_s
if clause[:term]
TermClause.new(prefix, operator, clause[:term].to_s)
elsif clause[:shortcode]
TermClause.new(prefix, operator, ":#{clause[:term]}:")
elsif clause[:phrase]
PhraseClause.new(prefix, operator, clause[:phrase].is_a?(Array) ? clause[:phrase].map { |p| p[:term].to_s }.join(' ') : clause[:phrase].to_s)
else
raise "Unexpected clause type: #{clause}"
end
end
rule(query: sequence(:clauses)) { Query.new(clauses) }
end

View file

@ -1,5 +1,5 @@
# frozen_string_literal: true
class Search < ActiveModelSerializers::Model
attributes :accounts, :statuses, :hashtags
attributes :accounts, :statuses, :hashtags, :profiles
end

View file

@ -113,7 +113,7 @@ class REST::InstanceSerializer < ActiveModel::Serializer
end
def fedibird_capabilities
[
capabilities = [
:favourite_hashtag,
:favourite_domain,
:favourite_list,
@ -135,6 +135,10 @@ class REST::InstanceSerializer < ActiveModel::Serializer
:misskey_location,
:status_reference,
]
capabilities << :profile_search unless Chewy.enabled?
capabilities
end
private

View file

@ -4,4 +4,5 @@ class REST::SearchSerializer < ActiveModel::Serializer
has_many :accounts, serializer: REST::AccountSerializer
has_many :statuses, serializer: REST::StatusSerializer
has_many :hashtags, serializer: REST::TagSerializer
has_many :profiles, serializer: REST::AccountSerializer
end

View file

@ -1,12 +1,12 @@
# frozen_string_literal: true
class AccountFullTextSearchService < BaseService
def call(query, account, limit, options = {})
def call(query, account, options = {})
@query = query&.strip
@account = account
@options = options
@limit = limit.to_i
@offset = options[:offset].to_i
@limit = options.delete(:limit).to_i
@offset = options.delete(:offset).to_i
return if @query.blank? || @limit.zero?
@ -18,7 +18,7 @@ class AccountFullTextSearchService < BaseService
def perform_account_text_search!
definition = parsed_query.apply(AccountsIndex.filter(term: { discoverable: true }))
results = definition.limit(@limit).offset(@offset).objects.compact
results = definition.order(last_status_at: :desc).limit(@limit).offset(@offset).objects.compact
account_ids = results.map(&:id)
preloaded_relations = relations_map_for_account(@account, account_ids)
@ -39,6 +39,6 @@ class AccountFullTextSearchService < BaseService
end
def parsed_query
SearchQueryTransformer.new.apply(SearchQueryParser.new.parse(@query))
AccountSearchQueryTransformer.new.apply(SearchQueryParser.new.parse(@query))
end
end

View file

@ -14,6 +14,15 @@ class AccountSearchService < BaseService
search_service_results.compact.uniq
end
def count(query, account = nil, options = {})
@acct_hint = query&.start_with?('@')
@query = query&.strip&.gsub(/\A@/, '')
@options = options
@account = account
from_elasticsearch(true)
end
private
def search_service_results
@ -75,7 +84,7 @@ class AccountSearchService < BaseService
Account.search_for(terms_for_query, limit_for_non_exact_results, options[:group_only], offset)
end
def from_elasticsearch
def from_elasticsearch(count = false)
must_clauses = [{ multi_match: { query: terms_for_query, fields: likely_acct? ? %w(acct.edge_ngram acct) : %w(acct.edge_ngram acct display_name.edge_ngram display_name), type: 'most_fields', operator: 'and' } }]
should_clauses = []
@ -99,7 +108,12 @@ class AccountSearchService < BaseService
query = { bool: { must: must_clauses, should: should_clauses } }
functions = [reputation_score_function, followers_score_function, time_distance_function]
records = AccountsIndex.query(function_score: { query: query, functions: functions, boost_mode: 'multiply', score_mode: 'avg' })
queried_account_index = AccountsIndex.query(function_score: { query: query, functions: functions, boost_mode: 'multiply', score_mode: 'avg' })
return queried_account_index.count if count
records = queried_account_index
.limit(limit_for_non_exact_results)
.offset(offset)
.objects

View file

@ -8,6 +8,7 @@ class SearchService < BaseService
@limit = limit.to_i
@offset = options[:type].blank? ? 0 : options[:offset].to_i
@resolve = options[:resolve] || false
@profile = options[:with_profiles] || false
default_results.tap do |results|
next if @query.blank? || @limit.zero?
@ -18,6 +19,20 @@ class SearchService < BaseService
results[:accounts] = perform_accounts_search! if account_searchable?
results[:statuses] = perform_statuses_search! if full_text_searchable?
results[:hashtags] = perform_hashtags_search! if hashtag_searchable?
if @profile
results[:profiles] = perform_accounts_full_text_search! if account_full_text_searchable?
elsif account_full_text_searchable?
accounts_count = results[:accounts].count
if accounts_count == 0
@offset -= count_accounts_search!
results[:accounts] = perform_accounts_full_text_search!
elsif accounts_count < @limit
@limit -= accounts_count
@offset = 0
results[:accounts] = results[:accounts].concat(perform_accounts_full_text_search!)
end
end
end
end
end
@ -34,6 +49,23 @@ class SearchService < BaseService
)
end
def count_accounts_search!
AccountSearchService.new.count(
@query,
@account,
)
end
def perform_accounts_full_text_search!
AccountFullTextSearchService.new.call(
@query,
@account,
limit: @limit,
resolve: @resolve,
offset: @offset
)
end
def perform_statuses_search!
definition = parsed_query.apply(StatusesIndex.filter(term: { searchable_by: @account.id }))
@ -68,7 +100,7 @@ class SearchService < BaseService
end
def default_results
{ accounts: [], hashtags: [], statuses: [] }
{ accounts: [], hashtags: [], statuses: [], profiles: [] }
end
def url_query?
@ -93,6 +125,12 @@ class SearchService < BaseService
statuses_search? && !@account.nil? && !((@query.start_with?('#') || @query.include?('@')) && !@query.include?(' '))
end
def account_full_text_searchable?
return false unless Chewy.enabled?
(!@profile && account_search? || profiles_search?) && !@account.nil? && !((@query.start_with?('#') || @query.include?('@')) && !@query.include?(' '))
end
def account_searchable?
account_search? && !(@query.start_with?('#') || (@query.include?('@') && @query.include?(' ')))
end
@ -113,6 +151,10 @@ class SearchService < BaseService
@options[:type].blank? || @options[:type] == 'statuses'
end
def profiles_search?
@options[:type].blank? || @options[:type] == 'profiles'
end
def relations_map_for_account(account, account_ids)
presenter = AccountRelationshipsPresenter.new(account_ids, account)
{