From 8ebcbaded38a984c87cbb6ca96f6645c85af21f3 Mon Sep 17 00:00:00 2001 From: noellabo Date: Mon, 12 Jul 2021 16:39:07 +0900 Subject: [PATCH] Bump chewy from 5.2.0 to 7.2.2 --- Gemfile | 2 +- Gemfile.lock | 34 ++++--- app/chewy/accounts_index.rb | 51 +++++----- app/chewy/statuses_index.rb | 79 +++++++-------- app/chewy/tags_index.rb | 23 +++-- app/models/account.rb | 2 +- app/models/account_stat.rb | 2 +- app/models/bookmark.rb | 2 +- app/models/favourite.rb | 2 +- app/models/status.rb | 7 +- app/models/tag.rb | 2 +- app/services/batched_remove_status_service.rb | 2 +- app/services/delete_account_service.rb | 6 +- config/initializers/chewy.rb | 20 ---- lib/mastodon/search_cli.rb | 99 +++++++++---------- 15 files changed, 166 insertions(+), 167 deletions(-) diff --git a/Gemfile b/Gemfile index 748433686..8231cd421 100644 --- a/Gemfile +++ b/Gemfile @@ -30,7 +30,7 @@ gem 'bootsnap', '~> 1.6.0', require: false gem 'browser' gem 'charlock_holmes', '~> 0.7.7' gem 'iso-639' -gem 'chewy', '~> 5.2' +gem 'chewy', '~> 7.2' gem 'cld3', '~> 3.4.2' gem 'devise', '~> 4.8' gem 'devise-two-factor', '~> 4.0' diff --git a/Gemfile.lock b/Gemfile.lock index 066bdf1c0..bddb19143 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -158,9 +158,9 @@ GEM activesupport cbor (0.5.9.6) charlock_holmes (0.7.7) - chewy (5.2.0) + chewy (7.2.2) activesupport (>= 5.2) - elasticsearch (>= 2.0.0) + elasticsearch (>= 7.12.0) elasticsearch-dsl chunky_png (1.4.0) cld3 (3.4.2) @@ -208,13 +208,13 @@ GEM railties (>= 3.2) e2mmap (0.1.0) ed25519 (1.2.4) - elasticsearch (7.10.1) - elasticsearch-api (= 7.10.1) - elasticsearch-transport (= 7.10.1) - elasticsearch-api (7.10.1) + elasticsearch (7.13.1) + elasticsearch-api (= 7.13.1) + elasticsearch-transport (= 7.13.1) + elasticsearch-api (7.13.1) multi_json - elasticsearch-dsl (0.1.9) - elasticsearch-transport (7.10.1) + elasticsearch-dsl (0.1.10) + elasticsearch-transport (7.13.1) faraday (~> 1) multi_json encryptor (3.0.0) @@ -225,11 +225,23 @@ GEM fabrication (2.22.0) faker (2.18.0) i18n (>= 1.6, < 2) - faraday (1.3.0) + faraday (1.5.1) + faraday-em_http (~> 1.0) + faraday-em_synchrony (~> 1.0) + faraday-excon (~> 1.1) + faraday-httpclient (~> 1.0.1) faraday-net_http (~> 1.0) + faraday-net_http_persistent (~> 1.1) + faraday-patron (~> 1.0) multipart-post (>= 1.2, < 3) - ruby2_keywords + ruby2_keywords (>= 0.0.4) + faraday-em_http (1.0.0) + faraday-em_synchrony (1.0.0) + faraday-excon (1.1.0) + faraday-httpclient (1.0.1) faraday-net_http (1.0.1) + faraday-net_http_persistent (1.1.0) + faraday-patron (1.0.0) fast_blank (1.0.0) fastimage (2.2.4) ffi (1.15.0) @@ -699,7 +711,7 @@ DEPENDENCIES capistrano-yarn (~> 2.0) capybara (~> 3.35) charlock_holmes (~> 0.7.7) - chewy (~> 5.2) + chewy (~> 7.2) cld3 (~> 3.4.2) climate_control (~> 0.2) color_diff (~> 0.1) diff --git a/app/chewy/accounts_index.rb b/app/chewy/accounts_index.rb index 503cde9ee..e5c67c8a4 100644 --- a/app/chewy/accounts_index.rb +++ b/app/chewy/accounts_index.rb @@ -1,7 +1,12 @@ # frozen_string_literal: true class AccountsIndex < Chewy::Index - settings index: { refresh_interval: '5m' }, analysis: { + settings index: { + refresh_interval: '5m', + number_of_shards: 1, + number_of_replicas: 0, + }, + analysis: { analyzer: { content: { tokenizer: 'whitespace', @@ -57,30 +62,30 @@ class AccountsIndex < Chewy::Index }, } - define_type ::Account.searchable.includes(:account_stat), delete_if: ->(account) { account.destroyed? || !account.searchable? } do - root date_detection: false do - field :id, type: 'long' + index_scope ::Account.searchable.includes(:account_stat), delete_if: ->(account) { account.destroyed? || !account.searchable? } - field :display_name, type: 'text', analyzer: 'content' do - field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content' - end + root date_detection: false do + field :id, type: 'long' - field :acct, type: 'text', analyzer: 'content', value: ->(account) { [account.username, account.domain].compact.join('@') } do - field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content' - end - - field :actor_type, type: 'keyword', normalizer: 'keyword' - - field :text, type: 'text', value: ->(account) { account.index_text } do - field :stemmed, type: 'text', analyzer: 'sudachi_content' - end - - field :discoverable, type: 'boolean' - - field :following_count, type: 'long', value: ->(account) { account.following.local.count } - field :followers_count, type: 'long', value: ->(account) { account.followers.local.count } - field :subscribing_count, type: 'long', value: ->(account) { account.subscribing.local.count } - field :last_status_at, type: 'date', value: ->(account) { account.last_status_at || account.created_at } + field :display_name, type: 'text', analyzer: 'content' do + field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content' end + + field :acct, type: 'text', analyzer: 'content', value: ->(account) { [account.username, account.domain].compact.join('@') } do + field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content' + end + + field :actor_type, type: 'keyword', normalizer: 'keyword' + + field :text, type: 'text', value: ->(account) { account.index_text } do + field :stemmed, type: 'text', analyzer: 'sudachi_content' + end + + field :discoverable, type: 'boolean' + + field :following_count, type: 'long', value: ->(account) { account.following.local.count } + field :followers_count, type: 'long', value: ->(account) { account.followers.local.count } + field :subscribing_count, type: 'long', value: ->(account) { account.subscribing.local.count } + field :last_status_at, type: 'date', value: ->(account) { account.last_status_at || account.created_at } end end diff --git a/app/chewy/statuses_index.rb b/app/chewy/statuses_index.rb index 018ee9c6e..e731eb390 100644 --- a/app/chewy/statuses_index.rb +++ b/app/chewy/statuses_index.rb @@ -1,17 +1,18 @@ # frozen_string_literal: true class StatusesIndex < Chewy::Index - settings index: { refresh_interval: '15m' }, analysis: { + settings index: { + refresh_interval: '15m', + number_of_shards: 1, + number_of_replicas: 0, + }, + analysis: { tokenizer: { sudachi_tokenizer: { type: 'sudachi_tokenizer', discard_punctuation: true, resources_path: '/etc/elasticsearch', settings_path: '/etc/elasticsearch/sudachi.json', - additional_settings: { - systemDict: 'system_full.dic', - userDict: [], - }, }, }, analyzer: { @@ -36,41 +37,41 @@ class StatusesIndex < Chewy::Index }, } - define_type ::Status.unscoped.kept.without_reblogs.includes(:media_attachments, :preloadable_poll) do - crutch :mentions do |collection| - data = ::Mention.where(status_id: collection.map(&:id)).where(account: Account.local, silent: false).pluck(:status_id, :account_id) - data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) } + index_scope ::Status.unscoped.kept.without_reblogs.includes(:media_attachments, :preloadable_poll) + + crutch :mentions do |collection| + data = ::Mention.where(status_id: collection.map(&:id)).where(account: Account.local, silent: false).pluck(:status_id, :account_id) + data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) } + end + + crutch :favourites do |collection| + data = ::Favourite.where(status_id: collection.map(&:id)).where(account: Account.local).pluck(:status_id, :account_id) + data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) } + end + + crutch :reblogs do |collection| + data = ::Status.where(reblog_of_id: collection.map(&:id)).where(account: Account.local).pluck(:reblog_of_id, :account_id) + data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) } + end + + crutch :bookmarks do |collection| + data = ::Bookmark.where(status_id: collection.map(&:id)).where(account: Account.local).pluck(:status_id, :account_id) + data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) } + end + + crutch :emoji_reactions do |collection| + data = ::EmojiReaction.where(status_id: collection.map(&:id)).where(account: Account.local).pluck(:status_id, :account_id) + data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) } + end + + root date_detection: false do + field :id, type: 'long' + field :account_id, type: 'long' + + field :text, type: 'text', value: ->(status) { status.index_text } do + field :stemmed, type: 'text', analyzer: 'content' end - crutch :favourites do |collection| - data = ::Favourite.where(status_id: collection.map(&:id)).where(account: Account.local).pluck(:status_id, :account_id) - data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) } - end - - crutch :reblogs do |collection| - data = ::Status.where(reblog_of_id: collection.map(&:id)).where(account: Account.local).pluck(:reblog_of_id, :account_id) - data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) } - end - - crutch :bookmarks do |collection| - data = ::Bookmark.where(status_id: collection.map(&:id)).where(account: Account.local).pluck(:status_id, :account_id) - data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) } - end - - crutch :emoji_reactions do |collection| - data = ::EmojiReaction.where(status_id: collection.map(&:id)).where(account: Account.local).pluck(:status_id, :account_id) - data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) } - end - - root date_detection: false do - field :id, type: 'long' - field :account_id, type: 'long' - - field :text, type: 'text', value: ->(status) { status.index_text } do - field :stemmed, type: 'text', analyzer: 'content' - end - - field :searchable_by, type: 'long', value: ->(status, crutches) { status.searchable_by(crutches) } - end + field :searchable_by, type: 'long', value: ->(status, crutches) { status.searchable_by(crutches) } end end diff --git a/app/chewy/tags_index.rb b/app/chewy/tags_index.rb index 300fc128f..a5f382b9e 100644 --- a/app/chewy/tags_index.rb +++ b/app/chewy/tags_index.rb @@ -1,7 +1,12 @@ # frozen_string_literal: true class TagsIndex < Chewy::Index - settings index: { refresh_interval: '15m' }, analysis: { + settings index: { + refresh_interval: '15m', + number_of_shards: 1, + number_of_replicas: 0, + }, + analysis: { analyzer: { content: { tokenizer: 'keyword', @@ -23,15 +28,15 @@ class TagsIndex < Chewy::Index }, } - define_type ::Tag.listable, delete_if: ->(tag) { tag.destroyed? || !tag.listable? } do - root date_detection: false do - field :name, type: 'text', analyzer: 'content' do - field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content' - end + index_scope ::Tag.listable, delete_if: ->(tag) { tag.destroyed? || !tag.listable? } - field :reviewed, type: 'boolean', value: ->(tag) { tag.reviewed? } - field :usage, type: 'long', value: ->(tag) { tag.history.reduce(0) { |total, day| total + day[:accounts].to_i } } - field :last_status_at, type: 'date', value: ->(tag) { tag.last_status_at || tag.created_at } + root date_detection: false do + field :name, type: 'text', analyzer: 'content' do + field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content' end + + field :reviewed, type: 'boolean', value: ->(tag) { tag.reviewed? } + field :usage, type: 'long', value: ->(tag) { tag.history.reduce(0) { |total, day| total + day[:accounts].to_i } } + field :last_status_at, type: 'date', value: ->(tag) { tag.last_status_at || tag.created_at } end end diff --git a/app/models/account.rb b/app/models/account.rb index 87aba7eed..70ddfa953 100644 --- a/app/models/account.rb +++ b/app/models/account.rb @@ -144,7 +144,7 @@ class Account < ApplicationRecord delegate :chosen_languages, to: :user, prefix: false, allow_nil: true - update_index('accounts#account', :self) + update_index('accounts', :self) def local? domain.nil? diff --git a/app/models/account_stat.rb b/app/models/account_stat.rb index 11e6318fe..2c5423f0e 100644 --- a/app/models/account_stat.rb +++ b/app/models/account_stat.rb @@ -20,5 +20,5 @@ class AccountStat < ApplicationRecord belongs_to :account, inverse_of: :account_stat - update_index('accounts#account', :account) + update_index('accounts', :account) end diff --git a/app/models/bookmark.rb b/app/models/bookmark.rb index f21ea714c..6334ef0df 100644 --- a/app/models/bookmark.rb +++ b/app/models/bookmark.rb @@ -13,7 +13,7 @@ class Bookmark < ApplicationRecord include Paginable - update_index('statuses#status', :status) if Chewy.enabled? + update_index('statuses', :status) if Chewy.enabled? belongs_to :account, inverse_of: :bookmarks belongs_to :status, inverse_of: :bookmarks diff --git a/app/models/favourite.rb b/app/models/favourite.rb index ca8bce146..2f355739a 100644 --- a/app/models/favourite.rb +++ b/app/models/favourite.rb @@ -13,7 +13,7 @@ class Favourite < ApplicationRecord include Paginable - update_index('statuses#status', :status) + update_index('statuses', :status) belongs_to :account, inverse_of: :favourites belongs_to :status, inverse_of: :favourites diff --git a/app/models/status.rb b/app/models/status.rb index 81faca0b4..b06ed5649 100644 --- a/app/models/status.rb +++ b/app/models/status.rb @@ -22,10 +22,9 @@ # application_id :bigint(8) # in_reply_to_account_id :bigint(8) # poll_id :bigint(8) -# quote_id :bigint(8) # deleted_at :datetime -# expires_at :datetime default(Infinity), not null -# expires_action :integer default("delete"), not null +# quote_id :bigint(8) +# expired_at :datetime # class Status < ApplicationRecord @@ -48,7 +47,7 @@ class Status < ApplicationRecord attr_accessor :circle - update_index('statuses#status', :proper) + update_index('statuses', :proper) enum visibility: [:public, :unlisted, :private, :direct, :limited, :mutual], _suffix: :visibility enum expires_action: [:delete, :hint], _prefix: :expires diff --git a/app/models/tag.rb b/app/models/tag.rb index f083760bb..b28c77010 100644 --- a/app/models/tag.rb +++ b/app/models/tag.rb @@ -43,7 +43,7 @@ class Tag < ApplicationRecord before_save :set_unlistable, if: :force_unlistable? - update_index('tags#tag', :self) + update_index('tags', :self) def to_param name diff --git a/app/services/batched_remove_status_service.rb b/app/services/batched_remove_status_service.rb index a1acace2f..328489765 100644 --- a/app/services/batched_remove_status_service.rb +++ b/app/services/batched_remove_status_service.rb @@ -35,7 +35,7 @@ class BatchedRemoveStatusService < BaseService # Since we skipped all callbacks, we also need to manually # deindex the statuses - Chewy.strategy.current.update(StatusesIndex::Status, statuses_and_reblogs) if Chewy.enabled? + Chewy.strategy.current.update(StatusesIndex, statuses_and_reblogs) if Chewy.enabled? return if options[:skip_side_effects] diff --git a/app/services/delete_account_service.rb b/app/services/delete_account_service.rb index 0fed52ce7..41a52af78 100644 --- a/app/services/delete_account_service.rb +++ b/app/services/delete_account_service.rb @@ -190,7 +190,7 @@ class DeleteAccountService < BaseService @account.favourites.in_batches do |favourites| ids = favourites.pluck(:status_id) StatusStat.where(status_id: ids).update_all('favourites_count = GREATEST(0, favourites_count - 1)') - Chewy.strategy.current.update(StatusesIndex::Status, ids) if Chewy.enabled? + Chewy.strategy.current.update(StatusesIndex, ids) if Chewy.enabled? Rails.cache.delete_multi(ids.map { |id| "statuses/#{id}" }) favourites.delete_all end @@ -198,14 +198,14 @@ class DeleteAccountService < BaseService def purge_bookmarks! @account.bookmarks.in_batches do |bookmarks| - Chewy.strategy.current.update(StatusesIndex::Status, bookmarks.pluck(:status_id)) if Chewy.enabled? + Chewy.strategy.current.update(StatusesIndex, bookmarks.pluck(:status_id)) if Chewy.enabled? bookmarks.delete_all end end def purge_reactions! @account.emoji_reactions.in_batches do |reactions| - Chewy.strategy.current.update(StatusesIndex::Status, reactions.pluck(:status_id)) if Chewy.enabled? + Chewy.strategy.current.update(StatusesIndex, reactions.pluck(:status_id)) if Chewy.enabled? reactions.delete_all end end diff --git a/config/initializers/chewy.rb b/config/initializers/chewy.rb index 9fc9b2f1a..e099b45cf 100644 --- a/config/initializers/chewy.rb +++ b/config/initializers/chewy.rb @@ -33,23 +33,3 @@ end # Mastodon is run with hidden services enabled, because # ElasticSearch is *not* supposed to be accessed through a proxy Faraday.ignore_env_proxy = true - -# Elasticsearch 7.x workaround -Elasticsearch::Transport::Client.prepend Module.new { - def search(arguments = {}) - arguments[:rest_total_hits_as_int] = true - super arguments - end -} - -Elasticsearch::API::Indices::IndicesClient.prepend Module.new { - def create(arguments = {}) - arguments[:include_type_name] = true - super arguments - end - - def put_mapping(arguments = {}) - arguments[:include_type_name] = true - super arguments - end -} diff --git a/lib/mastodon/search_cli.rb b/lib/mastodon/search_cli.rb index 0126dfcff..399a1dec3 100644 --- a/lib/mastodon/search_cli.rb +++ b/lib/mastodon/search_cli.rb @@ -65,9 +65,7 @@ module Mastodon # Estimate the amount of data that has to be imported first indices.each do |index| - index.types.each do |type| - progress.total = (progress.total || 0) + type.adapter.default_scope.count - end + progress.total = (progress.total || 0) + index.adapter.default_scope.count end # Now import all the actual data. Mind that unlike chewy:sync, we don't @@ -80,67 +78,66 @@ module Mastodon batch_size = 1_000 slice_size = (batch_size / options[:concurrency]).ceil - index.types.each do |type| - type.adapter.default_scope.reorder(nil).find_in_batches(batch_size: batch_size) do |batch| - futures = [] + index.adapter.default_scope.reorder(nil).find_in_batches(batch_size: batch_size) do |batch| + futures = [] - batch.each_slice(slice_size) do |records| - futures << Concurrent::Future.execute(executor: pool) do - begin - if !progress.total.nil? && progress.progress + records.size > progress.total - # The number of items has changed between start and now, - # since there is no good way to predict the final count from - # here, just change the progress bar to an indeterminate one + batch.each_slice(slice_size) do |records| + futures << Concurrent::Future.execute(executor: pool) do + begin + if !progress.total.nil? && progress.progress + records.size > progress.total + # The number of items has changed between start and now, + # since there is no good way to predict the final count from + # here, just change the progress bar to an indeterminate one - progress.total = nil - end + progress.total = nil + end - grouped_records = nil - bulk_body = nil - index_count = 0 - delete_count = 0 + grouped_records = nil + bulk_body = nil + index_count = 0 + delete_count = 0 - ActiveRecord::Base.connection_pool.with_connection do - grouped_records = type.adapter.send(:grouped_objects, records) - bulk_body = Chewy::Type::Import::BulkBuilder.new(type, **grouped_records).bulk_body - end + ActiveRecord::Base.connection_pool.with_connection do + grouped_records = index.adapter.send(:grouped_objects, records) + grouped_records = {to_index: grouped_records[:index] || [], delete: grouped_records[:delete] || []} unless grouped_records.has_key?(:to_index) && grouped_records.has_key?(:delete) + bulk_body = Chewy::Index::Import::BulkBuilder.new(index, **grouped_records).bulk_body + end - index_count = grouped_records[:index].size if grouped_records.key?(:index) - delete_count = grouped_records[:delete].size if grouped_records.key?(:delete) + index_count = grouped_records[:to_index].size if grouped_records.key?(:to_index) + delete_count = grouped_records[:delete].size if grouped_records.key?(:delete) - # The following is an optimization for statuses specifically, since - # we want to de-index statuses that cannot be searched by anybody, - # but can't use Chewy's delete_if logic because it doesn't use - # crutches and our searchable_by logic depends on them - if type == StatusesIndex::Status - bulk_body.map! do |entry| - if entry[:index] && entry.dig(:index, :data, 'searchable_by').blank? - index_count -= 1 - delete_count += 1 + # The following is an optimization for statuses specifically, since + # we want to de-index statuses that cannot be searched by anybody, + # but can't use Chewy's delete_if logic because it doesn't use + # crutches and our searchable_by logic depends on them + if index == StatusesIndex + bulk_body.map! do |entry| + if entry[:to_index] && entry.dig(:to_index, :data, 'searchable_by').blank? + index_count -= 1 + delete_count += 1 - { delete: entry[:index].except(:data) } - else - entry - end + { delete: entry[:to_index].except(:data) } + else + entry end end - - Chewy::Type::Import::BulkRequest.new(type).perform(bulk_body) - - progress.progress += records.size - - added.increment(index_count) - removed.increment(delete_count) - - sleep 1 - rescue => e - progress.log pastel.red("Error importing #{index}: #{e}") end + + Chewy::Index::Import::BulkRequest.new(index).perform(bulk_body) + + progress.progress += records.size + + added.increment(index_count) + removed.increment(delete_count) + + sleep 1 + rescue => e + progress.log pastel.red("Error importing #{index}: #{e}") end end - - futures.map(&:value) end + + futures.map(&:value) end end