forked from cybrespace/mastodon
Fix filterable_languages method of SettingsHelper (#4966)
This commit is contained in:
parent
efec507230
commit
48d77ea1eb
2
Gemfile
2
Gemfile
|
@ -25,7 +25,7 @@ gem 'bootsnap'
|
||||||
gem 'browser'
|
gem 'browser'
|
||||||
gem 'charlock_holmes', '~> 0.7.5'
|
gem 'charlock_holmes', '~> 0.7.5'
|
||||||
gem 'iso-639'
|
gem 'iso-639'
|
||||||
gem 'cld3', '~> 3.1'
|
gem 'cld3', '~> 3.2.0'
|
||||||
gem 'devise', '~> 4.2'
|
gem 'devise', '~> 4.2'
|
||||||
gem 'devise-two-factor', '~> 3.0'
|
gem 'devise-two-factor', '~> 3.0'
|
||||||
gem 'doorkeeper', '~> 4.2'
|
gem 'doorkeeper', '~> 4.2'
|
||||||
|
|
|
@ -110,7 +110,7 @@ GEM
|
||||||
activesupport
|
activesupport
|
||||||
charlock_holmes (0.7.5)
|
charlock_holmes (0.7.5)
|
||||||
chunky_png (1.3.8)
|
chunky_png (1.3.8)
|
||||||
cld3 (3.1.3)
|
cld3 (3.2.0)
|
||||||
ffi (>= 1.1.0, < 1.10.0)
|
ffi (>= 1.1.0, < 1.10.0)
|
||||||
climate_control (0.2.0)
|
climate_control (0.2.0)
|
||||||
cocaine (0.5.8)
|
cocaine (0.5.8)
|
||||||
|
@ -541,7 +541,7 @@ DEPENDENCIES
|
||||||
capistrano-yarn (~> 2.0)
|
capistrano-yarn (~> 2.0)
|
||||||
capybara (~> 2.14)
|
capybara (~> 2.14)
|
||||||
charlock_holmes (~> 0.7.5)
|
charlock_holmes (~> 0.7.5)
|
||||||
cld3 (~> 3.1)
|
cld3 (~> 3.2.0)
|
||||||
climate_control (~> 0.2)
|
climate_control (~> 0.2)
|
||||||
devise (~> 4.2)
|
devise (~> 4.2)
|
||||||
devise-two-factor (~> 3.0)
|
devise-two-factor (~> 3.0)
|
||||||
|
|
|
@ -41,7 +41,7 @@ module SettingsHelper
|
||||||
end
|
end
|
||||||
|
|
||||||
def filterable_languages
|
def filterable_languages
|
||||||
I18n.available_locales.map { |locale| locale.to_s.split('-').first.to_sym }.uniq
|
LanguageDetector.instance.language_names.select(&HUMAN_LOCALES.method(:key?))
|
||||||
end
|
end
|
||||||
|
|
||||||
def hash_to_object(hash)
|
def hash_to_object(hash)
|
||||||
|
|
|
@ -1,26 +1,31 @@
|
||||||
# frozen_string_literal: true
|
# frozen_string_literal: true
|
||||||
|
|
||||||
class LanguageDetector
|
class LanguageDetector
|
||||||
attr_reader :text, :account
|
include Singleton
|
||||||
|
|
||||||
def initialize(text, account = nil)
|
def initialize
|
||||||
@text = text
|
|
||||||
@account = account
|
|
||||||
@identifier = CLD3::NNetLanguageIdentifier.new(1, 2048)
|
@identifier = CLD3::NNetLanguageIdentifier.new(1, 2048)
|
||||||
end
|
end
|
||||||
|
|
||||||
def to_iso_s
|
def detect(text, account)
|
||||||
detected_language_code || default_locale
|
detect_language_code(text) || default_locale(account)
|
||||||
end
|
end
|
||||||
|
|
||||||
def prepared_text
|
def language_names
|
||||||
simplified_text.strip
|
@language_names =
|
||||||
|
CLD3::TaskContextParams::LANGUAGE_NAMES.map { |name| iso6391(name.to_s).to_sym }
|
||||||
|
.uniq
|
||||||
end
|
end
|
||||||
|
|
||||||
private
|
private
|
||||||
|
|
||||||
def detected_language_code
|
def prepare_text(text)
|
||||||
iso6391(result.language).to_sym if detected_language_reliable?
|
simplify_text(text).strip
|
||||||
|
end
|
||||||
|
|
||||||
|
def detect_language_code(text)
|
||||||
|
result = @identifier.find_language(prepare_text(text))
|
||||||
|
iso6391(result.language.to_s).to_sym if result.reliable?
|
||||||
end
|
end
|
||||||
|
|
||||||
def iso6391(bcp47)
|
def iso6391(bcp47)
|
||||||
|
@ -32,15 +37,7 @@ class LanguageDetector
|
||||||
ISO_639.find(iso639).alpha2
|
ISO_639.find(iso639).alpha2
|
||||||
end
|
end
|
||||||
|
|
||||||
def result
|
def simplify_text(text)
|
||||||
@result ||= @identifier.find_language(prepared_text)
|
|
||||||
end
|
|
||||||
|
|
||||||
def detected_language_reliable?
|
|
||||||
result.reliable?
|
|
||||||
end
|
|
||||||
|
|
||||||
def simplified_text
|
|
||||||
text.dup.tap do |new_text|
|
text.dup.tap do |new_text|
|
||||||
new_text.gsub!(FetchLinkCardService::URL_PATTERN, '')
|
new_text.gsub!(FetchLinkCardService::URL_PATTERN, '')
|
||||||
new_text.gsub!(Account::MENTION_RE, '')
|
new_text.gsub!(Account::MENTION_RE, '')
|
||||||
|
@ -49,7 +46,7 @@ class LanguageDetector
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def default_locale
|
def default_locale(account)
|
||||||
account&.user_locale&.to_sym || nil
|
account.user_locale&.to_sym
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -28,7 +28,7 @@ class PostStatusService < BaseService
|
||||||
sensitive: options[:sensitive],
|
sensitive: options[:sensitive],
|
||||||
spoiler_text: options[:spoiler_text] || '',
|
spoiler_text: options[:spoiler_text] || '',
|
||||||
visibility: options[:visibility] || account.user&.setting_default_privacy,
|
visibility: options[:visibility] || account.user&.setting_default_privacy,
|
||||||
language: detect_language_for(text, account),
|
language: LanguageDetector.instance.detect(text, account),
|
||||||
application: options[:application])
|
application: options[:application])
|
||||||
|
|
||||||
attach_media(status, media)
|
attach_media(status, media)
|
||||||
|
@ -69,10 +69,6 @@ class PostStatusService < BaseService
|
||||||
media.update(status_id: status.id)
|
media.update(status_id: status.id)
|
||||||
end
|
end
|
||||||
|
|
||||||
def detect_language_for(text, account)
|
|
||||||
LanguageDetector.new(text, account).to_iso_s
|
|
||||||
end
|
|
||||||
|
|
||||||
def process_mentions_service
|
def process_mentions_service
|
||||||
@process_mentions_service ||= ProcessMentionsService.new
|
@process_mentions_service ||= ProcessMentionsService.new
|
||||||
end
|
end
|
||||||
|
|
|
@ -3,10 +3,10 @@
|
||||||
require 'rails_helper'
|
require 'rails_helper'
|
||||||
|
|
||||||
describe LanguageDetector do
|
describe LanguageDetector do
|
||||||
describe 'prepared_text' do
|
describe 'prepare_text' do
|
||||||
it 'returns unmodified string without special cases' do
|
it 'returns unmodified string without special cases' do
|
||||||
string = 'just a regular string'
|
string = 'just a regular string'
|
||||||
result = described_class.new(string).prepared_text
|
result = described_class.instance.send(:prepare_text, string)
|
||||||
|
|
||||||
expect(result).to eq string
|
expect(result).to eq string
|
||||||
end
|
end
|
||||||
|
@ -14,33 +14,35 @@ describe LanguageDetector do
|
||||||
it 'collapses spacing in strings' do
|
it 'collapses spacing in strings' do
|
||||||
string = 'The formatting in this is very odd'
|
string = 'The formatting in this is very odd'
|
||||||
|
|
||||||
result = described_class.new(string).prepared_text
|
result = described_class.instance.send(:prepare_text, string)
|
||||||
expect(result).to eq 'The formatting in this is very odd'
|
expect(result).to eq 'The formatting in this is very odd'
|
||||||
end
|
end
|
||||||
|
|
||||||
it 'strips usernames from strings before detection' do
|
it 'strips usernames from strings before detection' do
|
||||||
string = '@username Yeah, very surreal...! also @friend'
|
string = '@username Yeah, very surreal...! also @friend'
|
||||||
|
|
||||||
result = described_class.new(string).prepared_text
|
result = described_class.instance.send(:prepare_text, string)
|
||||||
expect(result).to eq 'Yeah, very surreal...! also'
|
expect(result).to eq 'Yeah, very surreal...! also'
|
||||||
end
|
end
|
||||||
|
|
||||||
it 'strips URLs from strings before detection' do
|
it 'strips URLs from strings before detection' do
|
||||||
string = 'Our website is https://example.com and also http://localhost.dev'
|
string = 'Our website is https://example.com and also http://localhost.dev'
|
||||||
|
|
||||||
result = described_class.new(string).prepared_text
|
result = described_class.instance.send(:prepare_text, string)
|
||||||
expect(result).to eq 'Our website is and also'
|
expect(result).to eq 'Our website is and also'
|
||||||
end
|
end
|
||||||
|
|
||||||
it 'strips #hashtags from strings before detection' do
|
it 'strips #hashtags from strings before detection' do
|
||||||
string = 'Hey look at all the #animals and #fish'
|
string = 'Hey look at all the #animals and #fish'
|
||||||
|
|
||||||
result = described_class.new(string).prepared_text
|
result = described_class.instance.send(:prepare_text, string)
|
||||||
expect(result).to eq 'Hey look at all the and'
|
expect(result).to eq 'Hey look at all the and'
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
describe 'to_iso_s' do
|
describe 'detect' do
|
||||||
|
let(:account_without_user_locale) { Fabricate(:user, locale: nil).account }
|
||||||
|
|
||||||
it 'detects english language for basic strings' do
|
it 'detects english language for basic strings' do
|
||||||
strings = [
|
strings = [
|
||||||
"Hello and welcome to mastodon how are you today?",
|
"Hello and welcome to mastodon how are you today?",
|
||||||
|
@ -48,7 +50,7 @@ describe LanguageDetector do
|
||||||
"a lot of people just want to feel righteous all the time and that's all that matters",
|
"a lot of people just want to feel righteous all the time and that's all that matters",
|
||||||
]
|
]
|
||||||
strings.each do |string|
|
strings.each do |string|
|
||||||
result = described_class.new(string).to_iso_s
|
result = described_class.instance.detect(string, account_without_user_locale)
|
||||||
|
|
||||||
expect(result).to eq(:en), string
|
expect(result).to eq(:en), string
|
||||||
end
|
end
|
||||||
|
@ -56,14 +58,14 @@ describe LanguageDetector do
|
||||||
|
|
||||||
it 'detects spanish language' do
|
it 'detects spanish language' do
|
||||||
string = 'Obtener un Hola y bienvenidos a Mastodon'
|
string = 'Obtener un Hola y bienvenidos a Mastodon'
|
||||||
result = described_class.new(string).to_iso_s
|
result = described_class.instance.detect(string, account_without_user_locale)
|
||||||
|
|
||||||
expect(result).to eq :es
|
expect(result).to eq :es
|
||||||
end
|
end
|
||||||
|
|
||||||
describe 'when language can\'t be detected' do
|
describe 'when language can\'t be detected' do
|
||||||
it 'uses nil when sent an empty document' do
|
it 'uses nil when sent an empty document' do
|
||||||
result = described_class.new('').to_iso_s
|
result = described_class.instance.detect('', account_without_user_locale)
|
||||||
expect(result).to eq nil
|
expect(result).to eq nil
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -73,7 +75,7 @@ describe LanguageDetector do
|
||||||
cld_result = CLD3::NNetLanguageIdentifier.new(0, 2048).find_language(string)
|
cld_result = CLD3::NNetLanguageIdentifier.new(0, 2048).find_language(string)
|
||||||
expect(cld_result).not_to eq :en
|
expect(cld_result).not_to eq :en
|
||||||
|
|
||||||
result = described_class.new(string).to_iso_s
|
result = described_class.instance.detect(string, account_without_user_locale)
|
||||||
|
|
||||||
expect(result).to eq nil
|
expect(result).to eq nil
|
||||||
end
|
end
|
||||||
|
@ -82,14 +84,13 @@ describe LanguageDetector do
|
||||||
describe 'with an account' do
|
describe 'with an account' do
|
||||||
it 'uses the account locale when present' do
|
it 'uses the account locale when present' do
|
||||||
account = double(user_locale: 'fr')
|
account = double(user_locale: 'fr')
|
||||||
result = described_class.new('', account).to_iso_s
|
result = described_class.instance.detect('', account)
|
||||||
|
|
||||||
expect(result).to eq :fr
|
expect(result).to eq :fr
|
||||||
end
|
end
|
||||||
|
|
||||||
it 'uses nil when account is present but has no locale' do
|
it 'uses nil when account is present but has no locale' do
|
||||||
account = double(user_locale: nil)
|
result = described_class.instance.detect('', account_without_user_locale)
|
||||||
result = described_class.new('', account).to_iso_s
|
|
||||||
|
|
||||||
expect(result).to eq nil
|
expect(result).to eq nil
|
||||||
end
|
end
|
||||||
|
@ -97,8 +98,7 @@ describe LanguageDetector do
|
||||||
|
|
||||||
describe 'with an `en` default locale' do
|
describe 'with an `en` default locale' do
|
||||||
it 'uses nil for undetectable string' do
|
it 'uses nil for undetectable string' do
|
||||||
string = ''
|
result = described_class.instance.detect('', account_without_user_locale)
|
||||||
result = described_class.new(string).to_iso_s
|
|
||||||
|
|
||||||
expect(result).to eq nil
|
expect(result).to eq nil
|
||||||
end
|
end
|
||||||
|
@ -114,7 +114,7 @@ describe LanguageDetector do
|
||||||
|
|
||||||
it 'uses nil for undetectable string' do
|
it 'uses nil for undetectable string' do
|
||||||
string = ''
|
string = ''
|
||||||
result = described_class.new(string).to_iso_s
|
result = described_class.instance.detect(string, account_without_user_locale)
|
||||||
|
|
||||||
expect(result).to eq nil
|
expect(result).to eq nil
|
||||||
end
|
end
|
||||||
|
|
|
@ -65,15 +65,12 @@ RSpec.describe PostStatusService do
|
||||||
end
|
end
|
||||||
|
|
||||||
it 'creates a status with a language set' do
|
it 'creates a status with a language set' do
|
||||||
detector = double(to_iso_s: :en)
|
|
||||||
allow(LanguageDetector).to receive(:new).and_return(detector)
|
|
||||||
|
|
||||||
account = Fabricate(:account)
|
account = Fabricate(:account)
|
||||||
text = 'test status text'
|
text = 'This is an English text.'
|
||||||
|
|
||||||
subject.call(account, text)
|
status = subject.call(account, text)
|
||||||
|
|
||||||
expect(LanguageDetector).to have_received(:new).with(text, account)
|
expect(status.language).to eq 'en'
|
||||||
end
|
end
|
||||||
|
|
||||||
it 'processes mentions' do
|
it 'processes mentions' do
|
||||||
|
|
Loading…
Reference in New Issue