forked from cybrespace/mastodon
Add more accurate account search (#11537)
* Add more accurate account search When ElasticSearch is available, a more accurate search is implemented: - Using edge n-gram index for acct and display name - Using asciifolding and cjk width normalization on display names - Using Gaussian decay on account activity for additional scoring (recency) - Using followers/friends ratio for additional scoring (spamminess) - Using followers number for additional scoring (size) The exact match precedence only takes effect when the input conforms to the username format and the username part of it is complete, i.e. when the user started typing the domain part. * Support single-letter usernames * Fix tests * Fix not picking up account updates * Add weights and normalization for scores, skip zero terms queries * Use local counts for accounts index, adjust search parameters * Fix mistakes * Using updated_at of accounts is inadequate for remote accounts
This commit is contained in:
parent
2ca6b2bb6c
commit
8fdff2748f
|
@ -0,0 +1,36 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
class AccountsIndex < Chewy::Index
|
||||
settings index: { refresh_interval: '5m' }, analysis: {
|
||||
analyzer: {
|
||||
content: {
|
||||
tokenizer: 'whitespace',
|
||||
filter: %w(lowercase asciifolding cjk_width),
|
||||
},
|
||||
|
||||
edge_ngram: {
|
||||
tokenizer: 'edge_ngram',
|
||||
filter: %w(lowercase asciifolding cjk_width),
|
||||
},
|
||||
},
|
||||
|
||||
tokenizer: {
|
||||
edge_ngram: {
|
||||
type: 'edge_ngram',
|
||||
min_gram: 1,
|
||||
max_gram: 15,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
define_type ::Account.searchable.includes(:account_stat), delete_if: ->(account) { account.destroyed? || !account.searchable? } do
|
||||
root date_detection: false do
|
||||
field :id, type: 'long'
|
||||
field :display_name, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content'
|
||||
field :acct, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content', value: ->(account) { [account.username, account.domain].compact.join('@') }
|
||||
field :following_count, type: 'long', value: ->(account) { account.active_relationships.count }
|
||||
field :followers_count, type: 'long', value: ->(account) { account.passive_relationships.count }
|
||||
field :last_status_at, type: 'date', value: ->(account) { account.last_status_at || account.created_at }
|
||||
end
|
||||
end
|
||||
end
|
|
@ -127,6 +127,8 @@ class Account < ApplicationRecord
|
|||
|
||||
delegate :chosen_languages, to: :user, prefix: false, allow_nil: true
|
||||
|
||||
update_index('accounts#account', :self) if Chewy.enabled?
|
||||
|
||||
def local?
|
||||
domain.nil?
|
||||
end
|
||||
|
@ -169,6 +171,10 @@ class Account < ApplicationRecord
|
|||
subscription_expires_at.present?
|
||||
end
|
||||
|
||||
def searchable?
|
||||
!(suspended? || moved?)
|
||||
end
|
||||
|
||||
def possibly_stale?
|
||||
last_webfingered_at.nil? || last_webfingered_at <= 1.day.ago
|
||||
end
|
||||
|
|
|
@ -16,6 +16,8 @@
|
|||
class AccountStat < ApplicationRecord
|
||||
belongs_to :account, inverse_of: :account_stat
|
||||
|
||||
update_index('accounts#account', :account) if Chewy.enabled?
|
||||
|
||||
def increment_count!(key)
|
||||
update(attributes_for_increment(key))
|
||||
end
|
||||
|
|
|
@ -4,47 +4,150 @@ class AccountSearchService < BaseService
|
|||
attr_reader :query, :limit, :offset, :options, :account
|
||||
|
||||
def call(query, account = nil, options = {})
|
||||
@query = query.strip
|
||||
@limit = options[:limit].to_i
|
||||
@offset = options[:offset].to_i
|
||||
@options = options
|
||||
@account = account
|
||||
@acct_hint = query.start_with?('@')
|
||||
@query = query.strip.gsub(/\A@/, '')
|
||||
@limit = options[:limit].to_i
|
||||
@offset = options[:offset].to_i
|
||||
@options = options
|
||||
@account = account
|
||||
|
||||
search_service_results
|
||||
search_service_results.compact.uniq
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def search_service_results
|
||||
return [] if query_blank_or_hashtag? || limit < 1
|
||||
return [] if query.blank? || limit < 1
|
||||
|
||||
if resolving_non_matching_remote_account?
|
||||
[ResolveAccountService.new.call("#{query_username}@#{query_domain}")].compact
|
||||
else
|
||||
search_results_and_exact_match.compact.uniq
|
||||
[exact_match] + search_results
|
||||
end
|
||||
|
||||
def exact_match
|
||||
return unless offset.zero? && username_complete?
|
||||
|
||||
return @exact_match if defined?(@exact_match)
|
||||
|
||||
@exact_match = begin
|
||||
if options[:resolve]
|
||||
ResolveAccountService.new.call(query)
|
||||
elsif domain_is_local?
|
||||
Account.find_local(query_username)
|
||||
else
|
||||
Account.find_remote(query_username, query_domain)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def resolving_non_matching_remote_account?
|
||||
offset.zero? && options[:resolve] && !exact_match? && !domain_is_local?
|
||||
def search_results
|
||||
return [] if limit_for_non_exact_results.zero?
|
||||
|
||||
@search_results ||= begin
|
||||
if Chewy.enabled?
|
||||
from_elasticsearch
|
||||
else
|
||||
from_database
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def search_results_and_exact_match
|
||||
return search_results.to_a unless offset.zero?
|
||||
|
||||
results = [exact_match]
|
||||
|
||||
return results if exact_match? && limit == 1
|
||||
|
||||
results + search_results.to_a
|
||||
def from_database
|
||||
if account
|
||||
advanced_search_results
|
||||
else
|
||||
simple_search_results
|
||||
end
|
||||
end
|
||||
|
||||
def query_blank_or_hashtag?
|
||||
query.blank? || query.start_with?('#')
|
||||
def advanced_search_results
|
||||
Account.advanced_search_for(terms_for_query, account, limit_for_non_exact_results, options[:following], offset)
|
||||
end
|
||||
|
||||
def simple_search_results
|
||||
Account.search_for(terms_for_query, limit_for_non_exact_results, offset)
|
||||
end
|
||||
|
||||
def from_elasticsearch
|
||||
must_clauses = [{ multi_match: { query: terms_for_query, fields: likely_acct? ? %w(acct) : %w(acct^2 display_name), type: 'best_fields' } }]
|
||||
should_clauses = []
|
||||
|
||||
if account
|
||||
return [] if options[:following] && following_ids.empty?
|
||||
|
||||
if options[:following]
|
||||
must_clauses << { terms: { id: following_ids } }
|
||||
elsif following_ids.any?
|
||||
should_clauses << { terms: { id: following_ids, boost: 100 } }
|
||||
end
|
||||
end
|
||||
|
||||
query = { bool: { must: must_clauses, should: should_clauses } }
|
||||
functions = [reputation_score_function, followers_score_function, time_distance_function]
|
||||
|
||||
records = AccountsIndex.query(function_score: { query: query, functions: functions, boost_mode: 'multiply', score_mode: 'avg' })
|
||||
.limit(limit_for_non_exact_results)
|
||||
.offset(offset)
|
||||
.objects
|
||||
.compact
|
||||
|
||||
ActiveRecord::Associations::Preloader.new.preload(records, :account_stat)
|
||||
|
||||
records
|
||||
end
|
||||
|
||||
def reputation_score_function
|
||||
{
|
||||
script_score: {
|
||||
script: {
|
||||
source: "(doc['followers_count'].value + 0.0) / (doc['followers_count'].value + doc['following_count'].value + 1)",
|
||||
},
|
||||
},
|
||||
}
|
||||
end
|
||||
|
||||
def followers_score_function
|
||||
{
|
||||
field_value_factor: {
|
||||
field: 'followers_count',
|
||||
modifier: 'log2p',
|
||||
missing: 1,
|
||||
},
|
||||
}
|
||||
end
|
||||
|
||||
def time_distance_function
|
||||
{
|
||||
gauss: {
|
||||
last_status_at: {
|
||||
scale: '30d',
|
||||
offset: '30d',
|
||||
decay: 0.3,
|
||||
},
|
||||
},
|
||||
}
|
||||
end
|
||||
|
||||
def following_ids
|
||||
@following_ids ||= account.active_relationships.pluck(:target_account_id)
|
||||
end
|
||||
|
||||
def limit_for_non_exact_results
|
||||
if exact_match?
|
||||
limit - 1
|
||||
else
|
||||
limit
|
||||
end
|
||||
end
|
||||
|
||||
def terms_for_query
|
||||
if domain_is_local?
|
||||
query_username
|
||||
else
|
||||
query
|
||||
end
|
||||
end
|
||||
|
||||
def split_query_string
|
||||
@split_query_string ||= query.gsub(/\A@/, '').split('@')
|
||||
@split_query_string ||= query.split('@')
|
||||
end
|
||||
|
||||
def query_username
|
||||
|
@ -63,57 +166,15 @@ class AccountSearchService < BaseService
|
|||
@domain_is_local ||= TagManager.instance.local_domain?(query_domain)
|
||||
end
|
||||
|
||||
def search_from
|
||||
options[:following] && account ? account.following : Account
|
||||
end
|
||||
|
||||
def exact_match?
|
||||
exact_match.present?
|
||||
end
|
||||
|
||||
def exact_match
|
||||
return @exact_match if defined?(@exact_match)
|
||||
|
||||
@exact_match = begin
|
||||
if domain_is_local?
|
||||
search_from.without_suspended.find_local(query_username)
|
||||
else
|
||||
search_from.without_suspended.find_remote(query_username, query_domain)
|
||||
end
|
||||
end
|
||||
def username_complete?
|
||||
query.include?('@') && "@#{query}" =~ Account::MENTION_RE
|
||||
end
|
||||
|
||||
def search_results
|
||||
@search_results ||= begin
|
||||
if account
|
||||
advanced_search_results
|
||||
else
|
||||
simple_search_results
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def advanced_search_results
|
||||
Account.advanced_search_for(terms_for_query, account, limit_for_non_exact_results, options[:following], offset)
|
||||
end
|
||||
|
||||
def simple_search_results
|
||||
Account.search_for(terms_for_query, limit_for_non_exact_results, offset)
|
||||
end
|
||||
|
||||
def limit_for_non_exact_results
|
||||
if offset.zero? && exact_match?
|
||||
limit - 1
|
||||
else
|
||||
limit
|
||||
end
|
||||
end
|
||||
|
||||
def terms_for_query
|
||||
if domain_is_local?
|
||||
query_username
|
||||
else
|
||||
"#{query_username} #{query_domain}"
|
||||
end
|
||||
def likely_acct?
|
||||
@acct_hint || username_complete?
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,126 +1,56 @@
|
|||
require 'rails_helper'
|
||||
|
||||
describe AccountSearchService, type: :service do
|
||||
describe '.call' do
|
||||
describe 'with a query to ignore' do
|
||||
describe '#call' do
|
||||
context 'with a query to ignore' do
|
||||
it 'returns empty array for missing query' do
|
||||
results = subject.call('', nil, limit: 10)
|
||||
|
||||
expect(results).to eq []
|
||||
end
|
||||
it 'returns empty array for hashtag query' do
|
||||
results = subject.call('#tag', nil, limit: 10)
|
||||
|
||||
expect(results).to eq []
|
||||
end
|
||||
it 'returns empty array for limit zero' do
|
||||
Fabricate(:account, username: 'match')
|
||||
|
||||
results = subject.call('match', nil, limit: 0)
|
||||
|
||||
expect(results).to eq []
|
||||
end
|
||||
end
|
||||
|
||||
describe 'searching for a simple term that is not an exact match' do
|
||||
context 'searching for a simple term that is not an exact match' do
|
||||
it 'does not return a nil entry in the array for the exact match' do
|
||||
match = Fabricate(:account, username: 'matchingusername')
|
||||
|
||||
account = Fabricate(:account, username: 'matchingusername')
|
||||
results = subject.call('match', nil, limit: 5)
|
||||
expect(results).to eq [match]
|
||||
|
||||
expect(results).to eq [account]
|
||||
end
|
||||
end
|
||||
|
||||
describe 'searching local and remote users' do
|
||||
describe "when only '@'" do
|
||||
before do
|
||||
allow(Account).to receive(:find_local)
|
||||
allow(Account).to receive(:search_for)
|
||||
subject.call('@', nil, limit: 10)
|
||||
end
|
||||
|
||||
it 'uses find_local with empty query to look for local accounts' do
|
||||
expect(Account).to have_received(:find_local).with('')
|
||||
end
|
||||
end
|
||||
|
||||
describe 'when no domain' do
|
||||
before do
|
||||
allow(Account).to receive(:find_local)
|
||||
allow(Account).to receive(:search_for)
|
||||
subject.call('one', nil, limit: 10)
|
||||
end
|
||||
|
||||
it 'uses find_local to look for local accounts' do
|
||||
expect(Account).to have_received(:find_local).with('one')
|
||||
end
|
||||
|
||||
it 'uses search_for to find matches' do
|
||||
expect(Account).to have_received(:search_for).with('one', 10, 0)
|
||||
end
|
||||
end
|
||||
|
||||
describe 'when there is a domain' do
|
||||
before do
|
||||
allow(Account).to receive(:find_remote)
|
||||
end
|
||||
|
||||
it 'uses find_remote to look for remote accounts' do
|
||||
subject.call('two@example.com', nil, limit: 10)
|
||||
expect(Account).to have_received(:find_remote).with('two', 'example.com')
|
||||
end
|
||||
|
||||
describe 'and there is no account provided' do
|
||||
it 'uses search_for to find matches' do
|
||||
allow(Account).to receive(:search_for)
|
||||
subject.call('two@example.com', nil, limit: 10, resolve: false)
|
||||
|
||||
expect(Account).to have_received(:search_for).with('two example.com', 10, 0)
|
||||
end
|
||||
end
|
||||
|
||||
describe 'and there is an account provided' do
|
||||
it 'uses advanced_search_for to find matches' do
|
||||
account = Fabricate(:account)
|
||||
allow(Account).to receive(:advanced_search_for)
|
||||
subject.call('two@example.com', account, limit: 10, resolve: false)
|
||||
|
||||
expect(Account).to have_received(:advanced_search_for).with('two example.com', account, 10, nil, 0)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe 'with an exact match' do
|
||||
it 'returns exact match first, and does not return duplicates' do
|
||||
partial = Fabricate(:account, username: 'exactness')
|
||||
exact = Fabricate(:account, username: 'exact')
|
||||
|
||||
results = subject.call('exact', nil, limit: 10)
|
||||
expect(results.size).to eq 2
|
||||
expect(results).to eq [exact, partial]
|
||||
end
|
||||
end
|
||||
|
||||
describe 'when there is a local domain' do
|
||||
context 'when there is a local domain' do
|
||||
around do |example|
|
||||
before = Rails.configuration.x.local_domain
|
||||
|
||||
example.run
|
||||
|
||||
Rails.configuration.x.local_domain = before
|
||||
end
|
||||
|
||||
it 'returns exact match first' do
|
||||
remote = Fabricate(:account, username: 'a', domain: 'remote', display_name: 'e')
|
||||
remote_too = Fabricate(:account, username: 'b', domain: 'remote', display_name: 'e')
|
||||
exact = Fabricate(:account, username: 'e')
|
||||
exact = Fabricate(:account, username: 'e')
|
||||
|
||||
Rails.configuration.x.local_domain = 'example.com'
|
||||
|
||||
results = subject.call('e@example.com', nil, limit: 2)
|
||||
|
||||
expect(results.size).to eq 2
|
||||
expect(results).to eq([exact, remote]).or eq([exact, remote_too])
|
||||
end
|
||||
end
|
||||
|
||||
describe 'when there is a domain but no exact match' do
|
||||
context 'when there is a domain but no exact match' do
|
||||
it 'follows the remote account when resolve is true' do
|
||||
service = double(call: nil)
|
||||
allow(ResolveAccountService).to receive(:new).and_return(service)
|
||||
|
@ -138,23 +68,21 @@ describe AccountSearchService, type: :service do
|
|||
end
|
||||
end
|
||||
|
||||
describe 'should not include suspended accounts' do
|
||||
it 'returns the fuzzy match first, and does not return suspended exacts' do
|
||||
partial = Fabricate(:account, username: 'exactness')
|
||||
exact = Fabricate(:account, username: 'exact', suspended: true)
|
||||
it 'returns the fuzzy match first, and does not return suspended exacts' do
|
||||
partial = Fabricate(:account, username: 'exactness')
|
||||
exact = Fabricate(:account, username: 'exact', suspended: true)
|
||||
results = subject.call('exact', nil, limit: 10)
|
||||
|
||||
results = subject.call('exact', nil, limit: 10)
|
||||
expect(results.size).to eq 1
|
||||
expect(results).to eq [partial]
|
||||
end
|
||||
expect(results.size).to eq 1
|
||||
expect(results).to eq [partial]
|
||||
end
|
||||
|
||||
it "does not return suspended remote accounts" do
|
||||
remote = Fabricate(:account, username: 'a', domain: 'remote', display_name: 'e', suspended: true)
|
||||
it "does not return suspended remote accounts" do
|
||||
remote = Fabricate(:account, username: 'a', domain: 'remote', display_name: 'e', suspended: true)
|
||||
results = subject.call('a@example.com', nil, limit: 2)
|
||||
|
||||
results = subject.call('a@example.com', nil, limit: 2)
|
||||
expect(results.size).to eq 0
|
||||
expect(results).to eq []
|
||||
end
|
||||
expect(results.size).to eq 0
|
||||
expect(results).to eq []
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue