Bump chewy from 5.2.0 to 7.2.2

This commit is contained in:
noellabo 2021-07-12 16:39:07 +09:00
parent f7d91487be
commit 8ebcbaded3
15 changed files with 166 additions and 167 deletions

View file

@ -30,7 +30,7 @@ gem 'bootsnap', '~> 1.6.0', require: false
gem 'browser' gem 'browser'
gem 'charlock_holmes', '~> 0.7.7' gem 'charlock_holmes', '~> 0.7.7'
gem 'iso-639' gem 'iso-639'
gem 'chewy', '~> 5.2' gem 'chewy', '~> 7.2'
gem 'cld3', '~> 3.4.2' gem 'cld3', '~> 3.4.2'
gem 'devise', '~> 4.8' gem 'devise', '~> 4.8'
gem 'devise-two-factor', '~> 4.0' gem 'devise-two-factor', '~> 4.0'

View file

@ -158,9 +158,9 @@ GEM
activesupport activesupport
cbor (0.5.9.6) cbor (0.5.9.6)
charlock_holmes (0.7.7) charlock_holmes (0.7.7)
chewy (5.2.0) chewy (7.2.2)
activesupport (>= 5.2) activesupport (>= 5.2)
elasticsearch (>= 2.0.0) elasticsearch (>= 7.12.0)
elasticsearch-dsl elasticsearch-dsl
chunky_png (1.4.0) chunky_png (1.4.0)
cld3 (3.4.2) cld3 (3.4.2)
@ -208,13 +208,13 @@ GEM
railties (>= 3.2) railties (>= 3.2)
e2mmap (0.1.0) e2mmap (0.1.0)
ed25519 (1.2.4) ed25519 (1.2.4)
elasticsearch (7.10.1) elasticsearch (7.13.1)
elasticsearch-api (= 7.10.1) elasticsearch-api (= 7.13.1)
elasticsearch-transport (= 7.10.1) elasticsearch-transport (= 7.13.1)
elasticsearch-api (7.10.1) elasticsearch-api (7.13.1)
multi_json multi_json
elasticsearch-dsl (0.1.9) elasticsearch-dsl (0.1.10)
elasticsearch-transport (7.10.1) elasticsearch-transport (7.13.1)
faraday (~> 1) faraday (~> 1)
multi_json multi_json
encryptor (3.0.0) encryptor (3.0.0)
@ -225,11 +225,23 @@ GEM
fabrication (2.22.0) fabrication (2.22.0)
faker (2.18.0) faker (2.18.0)
i18n (>= 1.6, < 2) i18n (>= 1.6, < 2)
faraday (1.3.0) faraday (1.5.1)
faraday-em_http (~> 1.0)
faraday-em_synchrony (~> 1.0)
faraday-excon (~> 1.1)
faraday-httpclient (~> 1.0.1)
faraday-net_http (~> 1.0) faraday-net_http (~> 1.0)
faraday-net_http_persistent (~> 1.1)
faraday-patron (~> 1.0)
multipart-post (>= 1.2, < 3) multipart-post (>= 1.2, < 3)
ruby2_keywords ruby2_keywords (>= 0.0.4)
faraday-em_http (1.0.0)
faraday-em_synchrony (1.0.0)
faraday-excon (1.1.0)
faraday-httpclient (1.0.1)
faraday-net_http (1.0.1) faraday-net_http (1.0.1)
faraday-net_http_persistent (1.1.0)
faraday-patron (1.0.0)
fast_blank (1.0.0) fast_blank (1.0.0)
fastimage (2.2.4) fastimage (2.2.4)
ffi (1.15.0) ffi (1.15.0)
@ -699,7 +711,7 @@ DEPENDENCIES
capistrano-yarn (~> 2.0) capistrano-yarn (~> 2.0)
capybara (~> 3.35) capybara (~> 3.35)
charlock_holmes (~> 0.7.7) charlock_holmes (~> 0.7.7)
chewy (~> 5.2) chewy (~> 7.2)
cld3 (~> 3.4.2) cld3 (~> 3.4.2)
climate_control (~> 0.2) climate_control (~> 0.2)
color_diff (~> 0.1) color_diff (~> 0.1)

View file

@ -1,7 +1,12 @@
# frozen_string_literal: true # frozen_string_literal: true
class AccountsIndex < Chewy::Index class AccountsIndex < Chewy::Index
settings index: { refresh_interval: '5m' }, analysis: { settings index: {
refresh_interval: '5m',
number_of_shards: 1,
number_of_replicas: 0,
},
analysis: {
analyzer: { analyzer: {
content: { content: {
tokenizer: 'whitespace', tokenizer: 'whitespace',
@ -57,30 +62,30 @@ class AccountsIndex < Chewy::Index
}, },
} }
define_type ::Account.searchable.includes(:account_stat), delete_if: ->(account) { account.destroyed? || !account.searchable? } do index_scope ::Account.searchable.includes(:account_stat), delete_if: ->(account) { account.destroyed? || !account.searchable? }
root date_detection: false do
field :id, type: 'long'
field :display_name, type: 'text', analyzer: 'content' do root date_detection: false do
field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content' field :id, type: 'long'
end
field :acct, type: 'text', analyzer: 'content', value: ->(account) { [account.username, account.domain].compact.join('@') } do field :display_name, type: 'text', analyzer: 'content' do
field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content' field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content'
end
field :actor_type, type: 'keyword', normalizer: 'keyword'
field :text, type: 'text', value: ->(account) { account.index_text } do
field :stemmed, type: 'text', analyzer: 'sudachi_content'
end
field :discoverable, type: 'boolean'
field :following_count, type: 'long', value: ->(account) { account.following.local.count }
field :followers_count, type: 'long', value: ->(account) { account.followers.local.count }
field :subscribing_count, type: 'long', value: ->(account) { account.subscribing.local.count }
field :last_status_at, type: 'date', value: ->(account) { account.last_status_at || account.created_at }
end end
field :acct, type: 'text', analyzer: 'content', value: ->(account) { [account.username, account.domain].compact.join('@') } do
field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content'
end
field :actor_type, type: 'keyword', normalizer: 'keyword'
field :text, type: 'text', value: ->(account) { account.index_text } do
field :stemmed, type: 'text', analyzer: 'sudachi_content'
end
field :discoverable, type: 'boolean'
field :following_count, type: 'long', value: ->(account) { account.following.local.count }
field :followers_count, type: 'long', value: ->(account) { account.followers.local.count }
field :subscribing_count, type: 'long', value: ->(account) { account.subscribing.local.count }
field :last_status_at, type: 'date', value: ->(account) { account.last_status_at || account.created_at }
end end
end end

View file

@ -1,17 +1,18 @@
# frozen_string_literal: true # frozen_string_literal: true
class StatusesIndex < Chewy::Index class StatusesIndex < Chewy::Index
settings index: { refresh_interval: '15m' }, analysis: { settings index: {
refresh_interval: '15m',
number_of_shards: 1,
number_of_replicas: 0,
},
analysis: {
tokenizer: { tokenizer: {
sudachi_tokenizer: { sudachi_tokenizer: {
type: 'sudachi_tokenizer', type: 'sudachi_tokenizer',
discard_punctuation: true, discard_punctuation: true,
resources_path: '/etc/elasticsearch', resources_path: '/etc/elasticsearch',
settings_path: '/etc/elasticsearch/sudachi.json', settings_path: '/etc/elasticsearch/sudachi.json',
additional_settings: {
systemDict: 'system_full.dic',
userDict: [],
},
}, },
}, },
analyzer: { analyzer: {
@ -36,41 +37,41 @@ class StatusesIndex < Chewy::Index
}, },
} }
define_type ::Status.unscoped.kept.without_reblogs.includes(:media_attachments, :preloadable_poll) do index_scope ::Status.unscoped.kept.without_reblogs.includes(:media_attachments, :preloadable_poll)
crutch :mentions do |collection|
data = ::Mention.where(status_id: collection.map(&:id)).where(account: Account.local, silent: false).pluck(:status_id, :account_id) crutch :mentions do |collection|
data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) } data = ::Mention.where(status_id: collection.map(&:id)).where(account: Account.local, silent: false).pluck(:status_id, :account_id)
data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
end
crutch :favourites do |collection|
data = ::Favourite.where(status_id: collection.map(&:id)).where(account: Account.local).pluck(:status_id, :account_id)
data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
end
crutch :reblogs do |collection|
data = ::Status.where(reblog_of_id: collection.map(&:id)).where(account: Account.local).pluck(:reblog_of_id, :account_id)
data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
end
crutch :bookmarks do |collection|
data = ::Bookmark.where(status_id: collection.map(&:id)).where(account: Account.local).pluck(:status_id, :account_id)
data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
end
crutch :emoji_reactions do |collection|
data = ::EmojiReaction.where(status_id: collection.map(&:id)).where(account: Account.local).pluck(:status_id, :account_id)
data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
end
root date_detection: false do
field :id, type: 'long'
field :account_id, type: 'long'
field :text, type: 'text', value: ->(status) { status.index_text } do
field :stemmed, type: 'text', analyzer: 'content'
end end
crutch :favourites do |collection| field :searchable_by, type: 'long', value: ->(status, crutches) { status.searchable_by(crutches) }
data = ::Favourite.where(status_id: collection.map(&:id)).where(account: Account.local).pluck(:status_id, :account_id)
data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
end
crutch :reblogs do |collection|
data = ::Status.where(reblog_of_id: collection.map(&:id)).where(account: Account.local).pluck(:reblog_of_id, :account_id)
data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
end
crutch :bookmarks do |collection|
data = ::Bookmark.where(status_id: collection.map(&:id)).where(account: Account.local).pluck(:status_id, :account_id)
data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
end
crutch :emoji_reactions do |collection|
data = ::EmojiReaction.where(status_id: collection.map(&:id)).where(account: Account.local).pluck(:status_id, :account_id)
data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
end
root date_detection: false do
field :id, type: 'long'
field :account_id, type: 'long'
field :text, type: 'text', value: ->(status) { status.index_text } do
field :stemmed, type: 'text', analyzer: 'content'
end
field :searchable_by, type: 'long', value: ->(status, crutches) { status.searchable_by(crutches) }
end
end end
end end

View file

@ -1,7 +1,12 @@
# frozen_string_literal: true # frozen_string_literal: true
class TagsIndex < Chewy::Index class TagsIndex < Chewy::Index
settings index: { refresh_interval: '15m' }, analysis: { settings index: {
refresh_interval: '15m',
number_of_shards: 1,
number_of_replicas: 0,
},
analysis: {
analyzer: { analyzer: {
content: { content: {
tokenizer: 'keyword', tokenizer: 'keyword',
@ -23,15 +28,15 @@ class TagsIndex < Chewy::Index
}, },
} }
define_type ::Tag.listable, delete_if: ->(tag) { tag.destroyed? || !tag.listable? } do index_scope ::Tag.listable, delete_if: ->(tag) { tag.destroyed? || !tag.listable? }
root date_detection: false do
field :name, type: 'text', analyzer: 'content' do
field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content'
end
field :reviewed, type: 'boolean', value: ->(tag) { tag.reviewed? } root date_detection: false do
field :usage, type: 'long', value: ->(tag) { tag.history.reduce(0) { |total, day| total + day[:accounts].to_i } } field :name, type: 'text', analyzer: 'content' do
field :last_status_at, type: 'date', value: ->(tag) { tag.last_status_at || tag.created_at } field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content'
end end
field :reviewed, type: 'boolean', value: ->(tag) { tag.reviewed? }
field :usage, type: 'long', value: ->(tag) { tag.history.reduce(0) { |total, day| total + day[:accounts].to_i } }
field :last_status_at, type: 'date', value: ->(tag) { tag.last_status_at || tag.created_at }
end end
end end

View file

@ -144,7 +144,7 @@ class Account < ApplicationRecord
delegate :chosen_languages, to: :user, prefix: false, allow_nil: true delegate :chosen_languages, to: :user, prefix: false, allow_nil: true
update_index('accounts#account', :self) update_index('accounts', :self)
def local? def local?
domain.nil? domain.nil?

View file

@ -20,5 +20,5 @@ class AccountStat < ApplicationRecord
belongs_to :account, inverse_of: :account_stat belongs_to :account, inverse_of: :account_stat
update_index('accounts#account', :account) update_index('accounts', :account)
end end

View file

@ -13,7 +13,7 @@
class Bookmark < ApplicationRecord class Bookmark < ApplicationRecord
include Paginable include Paginable
update_index('statuses#status', :status) if Chewy.enabled? update_index('statuses', :status) if Chewy.enabled?
belongs_to :account, inverse_of: :bookmarks belongs_to :account, inverse_of: :bookmarks
belongs_to :status, inverse_of: :bookmarks belongs_to :status, inverse_of: :bookmarks

View file

@ -13,7 +13,7 @@
class Favourite < ApplicationRecord class Favourite < ApplicationRecord
include Paginable include Paginable
update_index('statuses#status', :status) update_index('statuses', :status)
belongs_to :account, inverse_of: :favourites belongs_to :account, inverse_of: :favourites
belongs_to :status, inverse_of: :favourites belongs_to :status, inverse_of: :favourites

View file

@ -22,10 +22,9 @@
# application_id :bigint(8) # application_id :bigint(8)
# in_reply_to_account_id :bigint(8) # in_reply_to_account_id :bigint(8)
# poll_id :bigint(8) # poll_id :bigint(8)
# quote_id :bigint(8)
# deleted_at :datetime # deleted_at :datetime
# expires_at :datetime default(Infinity), not null # quote_id :bigint(8)
# expires_action :integer default("delete"), not null # expired_at :datetime
# #
class Status < ApplicationRecord class Status < ApplicationRecord
@ -48,7 +47,7 @@ class Status < ApplicationRecord
attr_accessor :circle attr_accessor :circle
update_index('statuses#status', :proper) update_index('statuses', :proper)
enum visibility: [:public, :unlisted, :private, :direct, :limited, :mutual], _suffix: :visibility enum visibility: [:public, :unlisted, :private, :direct, :limited, :mutual], _suffix: :visibility
enum expires_action: [:delete, :hint], _prefix: :expires enum expires_action: [:delete, :hint], _prefix: :expires

View file

@ -43,7 +43,7 @@ class Tag < ApplicationRecord
before_save :set_unlistable, if: :force_unlistable? before_save :set_unlistable, if: :force_unlistable?
update_index('tags#tag', :self) update_index('tags', :self)
def to_param def to_param
name name

View file

@ -35,7 +35,7 @@ class BatchedRemoveStatusService < BaseService
# Since we skipped all callbacks, we also need to manually # Since we skipped all callbacks, we also need to manually
# deindex the statuses # deindex the statuses
Chewy.strategy.current.update(StatusesIndex::Status, statuses_and_reblogs) if Chewy.enabled? Chewy.strategy.current.update(StatusesIndex, statuses_and_reblogs) if Chewy.enabled?
return if options[:skip_side_effects] return if options[:skip_side_effects]

View file

@ -190,7 +190,7 @@ class DeleteAccountService < BaseService
@account.favourites.in_batches do |favourites| @account.favourites.in_batches do |favourites|
ids = favourites.pluck(:status_id) ids = favourites.pluck(:status_id)
StatusStat.where(status_id: ids).update_all('favourites_count = GREATEST(0, favourites_count - 1)') StatusStat.where(status_id: ids).update_all('favourites_count = GREATEST(0, favourites_count - 1)')
Chewy.strategy.current.update(StatusesIndex::Status, ids) if Chewy.enabled? Chewy.strategy.current.update(StatusesIndex, ids) if Chewy.enabled?
Rails.cache.delete_multi(ids.map { |id| "statuses/#{id}" }) Rails.cache.delete_multi(ids.map { |id| "statuses/#{id}" })
favourites.delete_all favourites.delete_all
end end
@ -198,14 +198,14 @@ class DeleteAccountService < BaseService
def purge_bookmarks! def purge_bookmarks!
@account.bookmarks.in_batches do |bookmarks| @account.bookmarks.in_batches do |bookmarks|
Chewy.strategy.current.update(StatusesIndex::Status, bookmarks.pluck(:status_id)) if Chewy.enabled? Chewy.strategy.current.update(StatusesIndex, bookmarks.pluck(:status_id)) if Chewy.enabled?
bookmarks.delete_all bookmarks.delete_all
end end
end end
def purge_reactions! def purge_reactions!
@account.emoji_reactions.in_batches do |reactions| @account.emoji_reactions.in_batches do |reactions|
Chewy.strategy.current.update(StatusesIndex::Status, reactions.pluck(:status_id)) if Chewy.enabled? Chewy.strategy.current.update(StatusesIndex, reactions.pluck(:status_id)) if Chewy.enabled?
reactions.delete_all reactions.delete_all
end end
end end

View file

@ -33,23 +33,3 @@ end
# Mastodon is run with hidden services enabled, because # Mastodon is run with hidden services enabled, because
# ElasticSearch is *not* supposed to be accessed through a proxy # ElasticSearch is *not* supposed to be accessed through a proxy
Faraday.ignore_env_proxy = true Faraday.ignore_env_proxy = true
# Elasticsearch 7.x workaround
Elasticsearch::Transport::Client.prepend Module.new {
def search(arguments = {})
arguments[:rest_total_hits_as_int] = true
super arguments
end
}
Elasticsearch::API::Indices::IndicesClient.prepend Module.new {
def create(arguments = {})
arguments[:include_type_name] = true
super arguments
end
def put_mapping(arguments = {})
arguments[:include_type_name] = true
super arguments
end
}

View file

@ -65,9 +65,7 @@ module Mastodon
# Estimate the amount of data that has to be imported first # Estimate the amount of data that has to be imported first
indices.each do |index| indices.each do |index|
index.types.each do |type| progress.total = (progress.total || 0) + index.adapter.default_scope.count
progress.total = (progress.total || 0) + type.adapter.default_scope.count
end
end end
# Now import all the actual data. Mind that unlike chewy:sync, we don't # Now import all the actual data. Mind that unlike chewy:sync, we don't
@ -80,67 +78,66 @@ module Mastodon
batch_size = 1_000 batch_size = 1_000
slice_size = (batch_size / options[:concurrency]).ceil slice_size = (batch_size / options[:concurrency]).ceil
index.types.each do |type| index.adapter.default_scope.reorder(nil).find_in_batches(batch_size: batch_size) do |batch|
type.adapter.default_scope.reorder(nil).find_in_batches(batch_size: batch_size) do |batch| futures = []
futures = []
batch.each_slice(slice_size) do |records| batch.each_slice(slice_size) do |records|
futures << Concurrent::Future.execute(executor: pool) do futures << Concurrent::Future.execute(executor: pool) do
begin begin
if !progress.total.nil? && progress.progress + records.size > progress.total if !progress.total.nil? && progress.progress + records.size > progress.total
# The number of items has changed between start and now, # The number of items has changed between start and now,
# since there is no good way to predict the final count from # since there is no good way to predict the final count from
# here, just change the progress bar to an indeterminate one # here, just change the progress bar to an indeterminate one
progress.total = nil progress.total = nil
end end
grouped_records = nil grouped_records = nil
bulk_body = nil bulk_body = nil
index_count = 0 index_count = 0
delete_count = 0 delete_count = 0
ActiveRecord::Base.connection_pool.with_connection do ActiveRecord::Base.connection_pool.with_connection do
grouped_records = type.adapter.send(:grouped_objects, records) grouped_records = index.adapter.send(:grouped_objects, records)
bulk_body = Chewy::Type::Import::BulkBuilder.new(type, **grouped_records).bulk_body grouped_records = {to_index: grouped_records[:index] || [], delete: grouped_records[:delete] || []} unless grouped_records.has_key?(:to_index) && grouped_records.has_key?(:delete)
end bulk_body = Chewy::Index::Import::BulkBuilder.new(index, **grouped_records).bulk_body
end
index_count = grouped_records[:index].size if grouped_records.key?(:index) index_count = grouped_records[:to_index].size if grouped_records.key?(:to_index)
delete_count = grouped_records[:delete].size if grouped_records.key?(:delete) delete_count = grouped_records[:delete].size if grouped_records.key?(:delete)
# The following is an optimization for statuses specifically, since # The following is an optimization for statuses specifically, since
# we want to de-index statuses that cannot be searched by anybody, # we want to de-index statuses that cannot be searched by anybody,
# but can't use Chewy's delete_if logic because it doesn't use # but can't use Chewy's delete_if logic because it doesn't use
# crutches and our searchable_by logic depends on them # crutches and our searchable_by logic depends on them
if type == StatusesIndex::Status if index == StatusesIndex
bulk_body.map! do |entry| bulk_body.map! do |entry|
if entry[:index] && entry.dig(:index, :data, 'searchable_by').blank? if entry[:to_index] && entry.dig(:to_index, :data, 'searchable_by').blank?
index_count -= 1 index_count -= 1
delete_count += 1 delete_count += 1
{ delete: entry[:index].except(:data) } { delete: entry[:to_index].except(:data) }
else else
entry entry
end
end end
end end
Chewy::Type::Import::BulkRequest.new(type).perform(bulk_body)
progress.progress += records.size
added.increment(index_count)
removed.increment(delete_count)
sleep 1
rescue => e
progress.log pastel.red("Error importing #{index}: #{e}")
end end
Chewy::Index::Import::BulkRequest.new(index).perform(bulk_body)
progress.progress += records.size
added.increment(index_count)
removed.increment(delete_count)
sleep 1
rescue => e
progress.log pastel.red("Error importing #{index}: #{e}")
end end
end end
futures.map(&:value)
end end
futures.map(&:value)
end end
end end