Full-text search for authorized statuses (#6423)
* Add full-text search for authorized statuses - Search API will return statuses that match the query - Only for logged in users - Only if you are author of the status, - Or you were mentioned in it - Or you favourited or reblogged it - Configuration over `ES_ENABLED`, `ES_HOST`, `ES_PORT`, `ES_PREFIX` - Run `rails chewy:deploy` to create & populate index Fix #5880 Fix #4293 Fix #1152 * Add commented out docker-compose configuration for ES container * Optimize index import, filter search results * Add basic normalization to the index * Add better stemming and normalization to the index * Skip webfinger request if search query includes both @ and a space * Fix code style * Visually separate search result sections * Fix code style issues
This commit is contained in:
		
							parent
							
								
									235c14c79d
								
							
						
					
					
						commit
						3ebc0ad4d3
					
				
					 13 changed files with 230 additions and 5 deletions
				
			
		| 
						 | 
				
			
			@ -9,6 +9,10 @@ DB_USER=postgres
 | 
			
		|||
DB_NAME=postgres
 | 
			
		||||
DB_PASS=
 | 
			
		||||
DB_PORT=5432
 | 
			
		||||
# Optional ElasticSearch configuration
 | 
			
		||||
# ES_ENABLED=true
 | 
			
		||||
# ES_HOST=localhost
 | 
			
		||||
# ES_PORT=9200
 | 
			
		||||
 | 
			
		||||
# Federation
 | 
			
		||||
# Note: Changing LOCAL_DOMAIN at a later time will cause unwanted side effects, including breaking all existing federation.
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										1
									
								
								Gemfile
									
										
									
									
									
								
							
							
						
						
									
										1
									
								
								Gemfile
									
										
									
									
									
								
							| 
						 | 
				
			
			@ -27,6 +27,7 @@ gem 'bootsnap'
 | 
			
		|||
gem 'browser'
 | 
			
		||||
gem 'charlock_holmes', '~> 0.7.5'
 | 
			
		||||
gem 'iso-639'
 | 
			
		||||
gem 'chewy', '~> 0.10', git: 'https://github.com/toptal/chewy.git'
 | 
			
		||||
gem 'cld3', '~> 3.2.0'
 | 
			
		||||
gem 'devise', '~> 4.4'
 | 
			
		||||
gem 'devise-two-factor', '~> 3.0'
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										22
									
								
								Gemfile.lock
									
										
									
									
									
								
							
							
						
						
									
										22
									
								
								Gemfile.lock
									
										
									
									
									
								
							| 
						 | 
				
			
			@ -1,3 +1,12 @@
 | 
			
		|||
GIT
 | 
			
		||||
  remote: https://github.com/toptal/chewy.git
 | 
			
		||||
  revision: a7d21eb4b0bd7415533ef134bb6d31b2df309701
 | 
			
		||||
  specs:
 | 
			
		||||
    chewy (0.10.1)
 | 
			
		||||
      activesupport (>= 4.0)
 | 
			
		||||
      elasticsearch (>= 2.0.0)
 | 
			
		||||
      elasticsearch-dsl
 | 
			
		||||
 | 
			
		||||
GEM
 | 
			
		||||
  remote: https://rubygems.org/
 | 
			
		||||
  specs:
 | 
			
		||||
| 
						 | 
				
			
			@ -154,6 +163,15 @@ GEM
 | 
			
		|||
      json
 | 
			
		||||
      thread
 | 
			
		||||
      thread_safe
 | 
			
		||||
    elasticsearch (6.0.1)
 | 
			
		||||
      elasticsearch-api (= 6.0.1)
 | 
			
		||||
      elasticsearch-transport (= 6.0.1)
 | 
			
		||||
    elasticsearch-api (6.0.1)
 | 
			
		||||
      multi_json
 | 
			
		||||
    elasticsearch-dsl (0.1.5)
 | 
			
		||||
    elasticsearch-transport (6.0.1)
 | 
			
		||||
      faraday
 | 
			
		||||
      multi_json
 | 
			
		||||
    encryptor (3.0.0)
 | 
			
		||||
    erubi (1.7.0)
 | 
			
		||||
    et-orbi (1.0.8)
 | 
			
		||||
| 
						 | 
				
			
			@ -163,6 +181,8 @@ GEM
 | 
			
		|||
    fabrication (2.18.0)
 | 
			
		||||
    faker (1.8.4)
 | 
			
		||||
      i18n (~> 0.5)
 | 
			
		||||
    faraday (0.14.0)
 | 
			
		||||
      multipart-post (>= 1.2, < 3)
 | 
			
		||||
    fast_blank (1.0.0)
 | 
			
		||||
    ffi (1.9.18)
 | 
			
		||||
    fog-core (1.45.0)
 | 
			
		||||
| 
						 | 
				
			
			@ -291,6 +311,7 @@ GEM
 | 
			
		|||
    minitest (5.11.3)
 | 
			
		||||
    msgpack (1.1.0)
 | 
			
		||||
    multi_json (1.12.2)
 | 
			
		||||
    multipart-post (2.0.0)
 | 
			
		||||
    net-scp (1.2.1)
 | 
			
		||||
      net-ssh (>= 2.6.5)
 | 
			
		||||
    net-ssh (4.2.0)
 | 
			
		||||
| 
						 | 
				
			
			@ -583,6 +604,7 @@ DEPENDENCIES
 | 
			
		|||
  capistrano-yarn (~> 2.0)
 | 
			
		||||
  capybara (~> 2.15)
 | 
			
		||||
  charlock_holmes (~> 0.7.5)
 | 
			
		||||
  chewy (~> 0.10)!
 | 
			
		||||
  cld3 (~> 3.2.0)
 | 
			
		||||
  climate_control (~> 0.2)
 | 
			
		||||
  devise (~> 4.4)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										61
									
								
								app/chewy/statuses_index.rb
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										61
									
								
								app/chewy/statuses_index.rb
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,61 @@
 | 
			
		|||
# frozen_string_literal: true
 | 
			
		||||
 | 
			
		||||
class StatusesIndex < Chewy::Index
 | 
			
		||||
  settings index: { refresh_interval: '15m' }, analysis: {
 | 
			
		||||
    filter: {
 | 
			
		||||
      english_stop: {
 | 
			
		||||
        type: 'stop',
 | 
			
		||||
        stopwords: '_english_',
 | 
			
		||||
      },
 | 
			
		||||
      english_stemmer: {
 | 
			
		||||
        type: 'stemmer',
 | 
			
		||||
        language: 'english',
 | 
			
		||||
      },
 | 
			
		||||
      english_possessive_stemmer: {
 | 
			
		||||
        type: 'stemmer',
 | 
			
		||||
        language: 'possessive_english',
 | 
			
		||||
      },
 | 
			
		||||
    },
 | 
			
		||||
    analyzer: {
 | 
			
		||||
      content: {
 | 
			
		||||
        tokenizer: 'uax_url_email',
 | 
			
		||||
        filter: %w(
 | 
			
		||||
          english_possessive_stemmer
 | 
			
		||||
          lowercase
 | 
			
		||||
          asciifolding
 | 
			
		||||
          cjk_width
 | 
			
		||||
          english_stop
 | 
			
		||||
          english_stemmer
 | 
			
		||||
        ),
 | 
			
		||||
      },
 | 
			
		||||
    },
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  define_type ::Status.without_reblogs do
 | 
			
		||||
    crutch :mentions do |collection|
 | 
			
		||||
      data = ::Mention.where(status_id: collection.map(&:id)).pluck(:status_id, :account_id)
 | 
			
		||||
      data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    crutch :favourites do |collection|
 | 
			
		||||
      data = ::Favourite.where(status_id: collection.map(&:id)).pluck(:status_id, :account_id)
 | 
			
		||||
      data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    crutch :reblogs do |collection|
 | 
			
		||||
      data = ::Status.where(reblog_of_id: collection.map(&:id)).pluck(:reblog_of_id, :account_id)
 | 
			
		||||
      data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    root date_detection: false do
 | 
			
		||||
      field :account_id, type: 'long'
 | 
			
		||||
 | 
			
		||||
      field :text, type: 'text', value: ->(status) { [status.spoiler_text, Formatter.instance.plaintext(status)].join("\n\n") } do
 | 
			
		||||
        field :stemmed, type: 'text', analyzer: 'content'
 | 
			
		||||
      end
 | 
			
		||||
 | 
			
		||||
      field :searchable_by, type: 'long', value: ->(status, crutches) { status.searchable_by(crutches) }
 | 
			
		||||
      field :created_at, type: 'date'
 | 
			
		||||
    end
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
| 
						 | 
				
			
			@ -22,6 +22,8 @@ export default class SearchResults extends ImmutablePureComponent {
 | 
			
		|||
      count   += results.get('accounts').size;
 | 
			
		||||
      accounts = (
 | 
			
		||||
        <div className='search-results__section'>
 | 
			
		||||
          <h5><FormattedMessage id='search_results.accounts' defaultMessage='People' /></h5>
 | 
			
		||||
 | 
			
		||||
          {results.get('accounts').map(accountId => <AccountContainer key={accountId} id={accountId} />)}
 | 
			
		||||
        </div>
 | 
			
		||||
      );
 | 
			
		||||
| 
						 | 
				
			
			@ -31,6 +33,8 @@ export default class SearchResults extends ImmutablePureComponent {
 | 
			
		|||
      count   += results.get('statuses').size;
 | 
			
		||||
      statuses = (
 | 
			
		||||
        <div className='search-results__section'>
 | 
			
		||||
          <h5><FormattedMessage id='search_results.statuses' defaultMessage='Toots' /></h5>
 | 
			
		||||
 | 
			
		||||
          {results.get('statuses').map(statusId => <StatusContainer key={statusId} id={statusId} />)}
 | 
			
		||||
        </div>
 | 
			
		||||
      );
 | 
			
		||||
| 
						 | 
				
			
			@ -40,6 +44,8 @@ export default class SearchResults extends ImmutablePureComponent {
 | 
			
		|||
      count += results.get('hashtags').size;
 | 
			
		||||
      hashtags = (
 | 
			
		||||
        <div className='search-results__section'>
 | 
			
		||||
          <h5><FormattedMessage id='search_results.hashtags' defaultMessage='Hashtags' /></h5>
 | 
			
		||||
 | 
			
		||||
          {results.get('hashtags').map(hashtag => (
 | 
			
		||||
            <Link key={hashtag} className='search-results__hashtag' to={`/timelines/tag/${hashtag}`}>
 | 
			
		||||
              #{hashtag}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1786,7 +1786,7 @@
 | 
			
		|||
  flex: 1;
 | 
			
		||||
  min-height: 47px;
 | 
			
		||||
 | 
			
		||||
  > img { 
 | 
			
		||||
  > img {
 | 
			
		||||
    display: block;
 | 
			
		||||
    object-fit: contain;
 | 
			
		||||
    object-position: bottom left;
 | 
			
		||||
| 
						 | 
				
			
			@ -3229,6 +3229,43 @@
 | 
			
		|||
  font-weight: 500;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.search-results__section {
 | 
			
		||||
  margin-bottom: 20px;
 | 
			
		||||
 | 
			
		||||
  h5 {
 | 
			
		||||
    position: relative;
 | 
			
		||||
 | 
			
		||||
    &::before {
 | 
			
		||||
      content: "";
 | 
			
		||||
      display: block;
 | 
			
		||||
      position: absolute;
 | 
			
		||||
      left: 0;
 | 
			
		||||
      right: 0;
 | 
			
		||||
      top: 50%;
 | 
			
		||||
      width: 100%;
 | 
			
		||||
      height: 0;
 | 
			
		||||
      border-top: 1px solid lighten($ui-base-color, 8%);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    span {
 | 
			
		||||
      display: inline-block;
 | 
			
		||||
      background: $ui-base-color;
 | 
			
		||||
      color: $ui-primary-color;
 | 
			
		||||
      font-size: 14px;
 | 
			
		||||
      font-weight: 500;
 | 
			
		||||
      padding: 10px;
 | 
			
		||||
      position: relative;
 | 
			
		||||
      z-index: 1;
 | 
			
		||||
      cursor: default;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  .account:last-child,
 | 
			
		||||
  & > div:last-child .status {
 | 
			
		||||
    border-bottom: 0;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.search-results__hashtag {
 | 
			
		||||
  display: block;
 | 
			
		||||
  padding: 10px;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -9,6 +9,7 @@ class StatusFilter
 | 
			
		|||
  end
 | 
			
		||||
 | 
			
		||||
  def filtered?
 | 
			
		||||
    return false if !account.nil? && account.id == status.account_id
 | 
			
		||||
    blocked_by_policy? || (account_present? && filtered_status?) || silenced_account?
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -13,6 +13,8 @@
 | 
			
		|||
class Favourite < ApplicationRecord
 | 
			
		||||
  include Paginable
 | 
			
		||||
 | 
			
		||||
  update_index('statuses#status', :status) if Chewy.enabled?
 | 
			
		||||
 | 
			
		||||
  belongs_to :account, inverse_of: :favourites
 | 
			
		||||
  belongs_to :status,  inverse_of: :favourites, counter_cache: true
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -31,6 +31,8 @@ class Status < ApplicationRecord
 | 
			
		|||
  include Cacheable
 | 
			
		||||
  include StatusThreadingConcern
 | 
			
		||||
 | 
			
		||||
  update_index('statuses#status', :proper) if Chewy.enabled?
 | 
			
		||||
 | 
			
		||||
  enum visibility: [:public, :unlisted, :private, :direct], _suffix: :visibility
 | 
			
		||||
 | 
			
		||||
  belongs_to :application, class_name: 'Doorkeeper::Application', optional: true
 | 
			
		||||
| 
						 | 
				
			
			@ -78,6 +80,22 @@ class Status < ApplicationRecord
 | 
			
		|||
 | 
			
		||||
  delegate :domain, to: :account, prefix: true
 | 
			
		||||
 | 
			
		||||
  def searchable_by(preloaded = nil)
 | 
			
		||||
    ids = [account_id]
 | 
			
		||||
 | 
			
		||||
    if preloaded.nil?
 | 
			
		||||
      ids += mentions.pluck(:account_id)
 | 
			
		||||
      ids += favourites.pluck(:account_id)
 | 
			
		||||
      ids += reblogs.pluck(:account_id)
 | 
			
		||||
    else
 | 
			
		||||
      ids += preloaded.mentions[id] || []
 | 
			
		||||
      ids += preloaded.favourites[id] || []
 | 
			
		||||
      ids += preloaded.reblogs[id] || []
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    ids.uniq
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def reply?
 | 
			
		||||
    !in_reply_to_id.nil? || attributes['reply']
 | 
			
		||||
  end
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,21 +1,43 @@
 | 
			
		|||
# frozen_string_literal: true
 | 
			
		||||
 | 
			
		||||
class SearchService < BaseService
 | 
			
		||||
  attr_accessor :query
 | 
			
		||||
  attr_accessor :query, :account, :limit, :resolve
 | 
			
		||||
 | 
			
		||||
  def call(query, limit, resolve = false, account = nil)
 | 
			
		||||
    @query = query
 | 
			
		||||
    @query   = query
 | 
			
		||||
    @account = account
 | 
			
		||||
    @limit   = limit
 | 
			
		||||
    @resolve = resolve
 | 
			
		||||
 | 
			
		||||
    default_results.tap do |results|
 | 
			
		||||
      if url_query?
 | 
			
		||||
        results.merge!(url_resource_results) unless url_resource.nil?
 | 
			
		||||
      elsif query.present?
 | 
			
		||||
        results[:accounts] = AccountSearchService.new.call(query, limit, account, resolve: resolve)
 | 
			
		||||
        results[:hashtags] = Tag.search_for(query.gsub(/\A#/, ''), limit) unless query.start_with?('@')
 | 
			
		||||
        results[:accounts] = perform_accounts_search! if account_searchable?
 | 
			
		||||
        results[:statuses] = perform_statuses_search! if full_text_searchable?
 | 
			
		||||
        results[:hashtags] = perform_hashtags_search! if hashtag_searchable?
 | 
			
		||||
      end
 | 
			
		||||
    end
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  private
 | 
			
		||||
 | 
			
		||||
  def perform_accounts_search!
 | 
			
		||||
    AccountSearchService.new.call(query, limit, account, resolve: resolve)
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def perform_statuses_search!
 | 
			
		||||
    statuses = StatusesIndex.filter(term: { searchable_by: account.id })
 | 
			
		||||
                            .query(multi_match: { type: 'most_fields', query: query, operator: 'and', fields: %w(text text.stemmed) })
 | 
			
		||||
                            .limit(limit).objects
 | 
			
		||||
 | 
			
		||||
    statuses.reject { |status| StatusFilter.new(status, account).filtered? }
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def perform_hashtags_search!
 | 
			
		||||
    Tag.search_for(query.gsub(/\A#/, ''), limit)
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def default_results
 | 
			
		||||
    { accounts: [], hashtags: [], statuses: [] }
 | 
			
		||||
  end
 | 
			
		||||
| 
						 | 
				
			
			@ -35,4 +57,17 @@ class SearchService < BaseService
 | 
			
		|||
  def url_resource_symbol
 | 
			
		||||
    url_resource.class.name.downcase.pluralize.to_sym
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def full_text_searchable?
 | 
			
		||||
    return false unless Chewy.enabled?
 | 
			
		||||
    !account.nil? && !((query.start_with?('#') || query.include?('@')) && !query.include?(' '))
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def account_searchable?
 | 
			
		||||
    !(query.include?('@') && query.include?(' '))
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def hashtag_searchable?
 | 
			
		||||
    !query.include?('@')
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										22
									
								
								config/initializers/chewy.rb
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								config/initializers/chewy.rb
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,22 @@
 | 
			
		|||
enabled         = ENV['ES_ENABLED'] == 'true'
 | 
			
		||||
host            = ENV.fetch('ES_HOST') { 'localhost' }
 | 
			
		||||
port            = ENV.fetch('ES_PORT') { 9200 }
 | 
			
		||||
fallback_prefix = ENV.fetch('REDIS_NAMESPACE') { nil }
 | 
			
		||||
prefix          = ENV.fetch('ES_PREFIX') { fallback_prefix }
 | 
			
		||||
 | 
			
		||||
Chewy.settings = {
 | 
			
		||||
  host: "#{host}:#{port}",
 | 
			
		||||
  prefix: prefix,
 | 
			
		||||
  enabled: enabled,
 | 
			
		||||
  journal: false,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
Chewy.root_strategy = enabled ? :sidekiq : :bypass
 | 
			
		||||
 | 
			
		||||
module Chewy
 | 
			
		||||
  class << self
 | 
			
		||||
    def enabled?
 | 
			
		||||
      settings[:enabled]
 | 
			
		||||
    end
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
| 
						 | 
				
			
			@ -19,6 +19,17 @@ services:
 | 
			
		|||
#    volumes:
 | 
			
		||||
#      - ./redis:/data
 | 
			
		||||
 | 
			
		||||
#  es:
 | 
			
		||||
#    restart: always
 | 
			
		||||
#    image: docker.elastic.co/elasticsearch/elasticsearch-oss:6.1.3
 | 
			
		||||
#    environment:
 | 
			
		||||
#      - "ES_JAVA_OPTS=-Xms512m -Xmx512m"
 | 
			
		||||
#    networks:
 | 
			
		||||
#      - internal_network
 | 
			
		||||
#### Uncomment to enable ES persistance
 | 
			
		||||
##    volumes:
 | 
			
		||||
##      - ./elasticsearch:/usr/share/elasticsearch/data
 | 
			
		||||
 | 
			
		||||
  web:
 | 
			
		||||
    build: .
 | 
			
		||||
    image: gargron/mastodon
 | 
			
		||||
| 
						 | 
				
			
			@ -33,6 +44,7 @@ services:
 | 
			
		|||
    depends_on:
 | 
			
		||||
      - db
 | 
			
		||||
      - redis
 | 
			
		||||
#      - es
 | 
			
		||||
    volumes:
 | 
			
		||||
      - ./public/assets:/mastodon/public/assets
 | 
			
		||||
      - ./public/packs:/mastodon/public/packs
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -25,6 +25,10 @@ RSpec.configure do |config|
 | 
			
		|||
    end
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  config.before :suite do
 | 
			
		||||
    Chewy.strategy(:bypass)
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  config.after :suite do
 | 
			
		||||
    gc_counter = 0
 | 
			
		||||
    FileUtils.rm_rf(Dir["#{Rails.root}/spec/test_files/"])
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		
		Reference in a new issue