Language improvements, replace whatlanguage with CLD (#2753)
* add failing en specs * add cld2 gem * Replace WhatLanguage with CLD
This commit is contained in:
		
							parent
							
								
									53384b0ffe
								
							
						
					
					
						commit
						8c5ad23b24
					
				
					 4 changed files with 32 additions and 12 deletions
				
			
		
							
								
								
									
										2
									
								
								Gemfile
									
										
									
									
									
								
							
							
						
						
									
										2
									
								
								Gemfile
									
										
									
									
									
								
							|  | @ -20,6 +20,7 @@ gem 'paperclip', '~> 5.1' | ||||||
| gem 'paperclip-av-transcoder' | gem 'paperclip-av-transcoder' | ||||||
| 
 | 
 | ||||||
| gem 'addressable' | gem 'addressable' | ||||||
|  | gem 'cld2', require: 'cld' | ||||||
| gem 'devise' | gem 'devise' | ||||||
| gem 'devise-two-factor' | gem 'devise-two-factor' | ||||||
| gem 'doorkeeper' | gem 'doorkeeper' | ||||||
|  | @ -56,7 +57,6 @@ gem 'statsd-instrument' | ||||||
| gem 'twitter-text' | gem 'twitter-text' | ||||||
| gem 'tzinfo-data' | gem 'tzinfo-data' | ||||||
| gem 'webpacker', '~>1.2' | gem 'webpacker', '~>1.2' | ||||||
| gem 'whatlanguage' |  | ||||||
| 
 | 
 | ||||||
| # For some reason the view specs start failing without this | # For some reason the view specs start failing without this | ||||||
| gem 'react-rails' | gem 'react-rails' | ||||||
|  |  | ||||||
|  | @ -102,6 +102,8 @@ GEM | ||||||
|       rack-test (>= 0.5.4) |       rack-test (>= 0.5.4) | ||||||
|       xpath (~> 2.0) |       xpath (~> 2.0) | ||||||
|     chunky_png (1.3.8) |     chunky_png (1.3.8) | ||||||
|  |     cld2 (1.0.3) | ||||||
|  |       ffi (~> 1.9.3) | ||||||
|     climate_control (0.1.0) |     climate_control (0.1.0) | ||||||
|     cocaine (0.5.8) |     cocaine (0.5.8) | ||||||
|       climate_control (>= 0.0.3, < 1.0) |       climate_control (>= 0.0.3, < 1.0) | ||||||
|  | @ -153,6 +155,7 @@ GEM | ||||||
|     faker (1.7.3) |     faker (1.7.3) | ||||||
|       i18n (~> 0.5) |       i18n (~> 0.5) | ||||||
|     fast_blank (1.0.0) |     fast_blank (1.0.0) | ||||||
|  |     ffi (1.9.18) | ||||||
|     fuubar (2.2.0) |     fuubar (2.2.0) | ||||||
|       rspec-core (~> 3.0) |       rspec-core (~> 3.0) | ||||||
|       ruby-progressbar (~> 1.4) |       ruby-progressbar (~> 1.4) | ||||||
|  | @ -463,7 +466,6 @@ GEM | ||||||
|     websocket-driver (0.6.5) |     websocket-driver (0.6.5) | ||||||
|       websocket-extensions (>= 0.1.0) |       websocket-extensions (>= 0.1.0) | ||||||
|     websocket-extensions (0.1.2) |     websocket-extensions (0.1.2) | ||||||
|     whatlanguage (1.0.6) |  | ||||||
|     xpath (2.0.0) |     xpath (2.0.0) | ||||||
|       nokogiri (~> 1.3) |       nokogiri (~> 1.3) | ||||||
| 
 | 
 | ||||||
|  | @ -484,6 +486,7 @@ DEPENDENCIES | ||||||
|   capistrano-rbenv |   capistrano-rbenv | ||||||
|   capistrano-yarn |   capistrano-yarn | ||||||
|   capybara |   capybara | ||||||
|  |   cld2 | ||||||
|   devise |   devise | ||||||
|   devise-two-factor |   devise-two-factor | ||||||
|   doorkeeper |   doorkeeper | ||||||
|  | @ -549,7 +552,6 @@ DEPENDENCIES | ||||||
|   uglifier (>= 1.3.0) |   uglifier (>= 1.3.0) | ||||||
|   webmock |   webmock | ||||||
|   webpacker (~> 1.2) |   webpacker (~> 1.2) | ||||||
|   whatlanguage |  | ||||||
| 
 | 
 | ||||||
| RUBY VERSION | RUBY VERSION | ||||||
|    ruby 2.4.1p111 |    ruby 2.4.1p111 | ||||||
|  |  | ||||||
|  | @ -9,11 +9,23 @@ class LanguageDetector | ||||||
|   end |   end | ||||||
| 
 | 
 | ||||||
|   def to_iso_s |   def to_iso_s | ||||||
|     WhatLanguage.new(:all).language_iso(text_without_urls) || default_locale.to_sym |     detected_language_code || default_locale.to_sym | ||||||
|   end |   end | ||||||
| 
 | 
 | ||||||
|   private |   private | ||||||
| 
 | 
 | ||||||
|  |   def detected_language_code | ||||||
|  |     detected_language[:code].to_sym if detected_language_reliable? | ||||||
|  |   end | ||||||
|  | 
 | ||||||
|  |   def detected_language | ||||||
|  |     @_detected_language ||= CLD.detect_language(text_without_urls) | ||||||
|  |   end | ||||||
|  | 
 | ||||||
|  |   def detected_language_reliable? | ||||||
|  |     detected_language[:reliable] | ||||||
|  |   end | ||||||
|  | 
 | ||||||
|   def text_without_urls |   def text_without_urls | ||||||
|     text.dup.tap do |new_text| |     text.dup.tap do |new_text| | ||||||
|       URI.extract(new_text).each do |url| |       URI.extract(new_text).each do |url| | ||||||
|  |  | ||||||
|  | @ -3,11 +3,17 @@ require 'rails_helper' | ||||||
| 
 | 
 | ||||||
| describe LanguageDetector do | describe LanguageDetector do | ||||||
|   describe 'to_iso_s' do |   describe 'to_iso_s' do | ||||||
|     it 'detects english language' do |     it 'detects english language for basic strings' do | ||||||
|       string = 'Hello and welcome to mastodon' |       strings = [ | ||||||
|  |         "Hello and welcome to mastodon", | ||||||
|  |         "I'd rather not!", | ||||||
|  |         "a lot of people just want to feel righteous all the time and that's all that matters", | ||||||
|  |       ] | ||||||
|  |       strings.each do |string| | ||||||
|         result = described_class.new(string).to_iso_s |         result = described_class.new(string).to_iso_s | ||||||
| 
 | 
 | ||||||
|       expect(result).to eq :en |         expect(result).to eq(:en), string | ||||||
|  |       end | ||||||
|     end |     end | ||||||
| 
 | 
 | ||||||
|     it 'detects spanish language' do |     it 'detects spanish language' do | ||||||
|  | @ -19,15 +25,15 @@ describe LanguageDetector do | ||||||
| 
 | 
 | ||||||
|     describe 'when language can\'t be detected' do |     describe 'when language can\'t be detected' do | ||||||
|       it 'confirm language engine cant detect' do |       it 'confirm language engine cant detect' do | ||||||
|         result = WhatLanguage.new(:all).language_iso('') |         result = CLD.detect_language('') | ||||||
|         expect(result).to be_nil |         expect(result[:reliable]).to be false | ||||||
|       end |       end | ||||||
| 
 | 
 | ||||||
|       describe 'because of a URL' do |       describe 'because of a URL' do | ||||||
|         it 'uses default locale when sent just a URL' do |         it 'uses default locale when sent just a URL' do | ||||||
|           string = 'http://example.com/media/2kFTgOJLXhQf0g2nKB4' |           string = 'http://example.com/media/2kFTgOJLXhQf0g2nKB4' | ||||||
|           wl_result = WhatLanguage.new(:all).language_iso(string) |           cld_result = CLD.detect_language(string)[:code] | ||||||
|           expect(wl_result).not_to eq :en |           expect(cld_result).not_to eq :en | ||||||
| 
 | 
 | ||||||
|           result = described_class.new(string).to_iso_s |           result = described_class.new(string).to_iso_s | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		
		Reference in a new issue