From e4af4898de8ab962bf39ced5d31d88e3fd510538 Mon Sep 17 00:00:00 2001 From: Eugen Date: Sun, 16 Apr 2017 20:32:17 +0200 Subject: [PATCH] Add language detection (#1772) * Add language detection via WhatLanguage and (de)serialization of it through Atom * Fix default language in ProcessFeedService * Re-add newline before 'react-rails' Gem to fix groupings Fixes Code Climate issue --- Gemfile | 1 + Gemfile.lock | 2 ++ app/lib/atom_serializer.rb | 4 ++-- app/services/post_status_service.rb | 5 +++++ app/services/process_feed_service.rb | 5 +++++ db/migrate/20170414132105_add_language_to_statuses.rb | 5 +++++ db/schema.rb | 5 +++-- 7 files changed, 23 insertions(+), 4 deletions(-) create mode 100644 db/migrate/20170414132105_add_language_to_statuses.rb diff --git a/Gemfile b/Gemfile index 12e783805..2971a7a29 100644 --- a/Gemfile +++ b/Gemfile @@ -56,6 +56,7 @@ gem 'sprockets-rails', :require => 'sprockets/railtie' gem 'statsd-instrument' gem 'twitter-text' gem 'tzinfo-data' +gem 'whatlanguage' gem 'react-rails' gem 'browserify-rails' diff --git a/Gemfile.lock b/Gemfile.lock index 97e5a6658..b0ef1c768 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -455,6 +455,7 @@ GEM websocket-driver (0.6.5) websocket-extensions (>= 0.1.0) websocket-extensions (0.1.2) + whatlanguage (1.0.6) xpath (2.0.0) nokogiri (~> 1.3) @@ -541,6 +542,7 @@ DEPENDENCIES tzinfo-data uglifier (>= 1.3.0) webmock + whatlanguage RUBY VERSION ruby 2.4.1p111 diff --git a/app/lib/atom_serializer.rb b/app/lib/atom_serializer.rb index 180b9bb82..6f1910440 100644 --- a/app/lib/atom_serializer.rb +++ b/app/lib/atom_serializer.rb @@ -327,8 +327,8 @@ class AtomSerializer end def serialize_status_attributes(entry, status) - append_element(entry, 'summary', status.spoiler_text) if status.spoiler_text? - append_element(entry, 'content', Formatter.instance.format(status.proper).to_str, type: 'html') + append_element(entry, 'summary', status.spoiler_text, 'xml:lang': status.language) if status.spoiler_text? + append_element(entry, 'content', Formatter.instance.format(status.proper).to_str, type: 'html', 'xml:lang': status.language) status.mentions.each do |mentioned| append_element(entry, 'link', nil, rel: :mentioned, 'ostatus:object-type': TagManager::TYPES[:person], href: TagManager.instance.uri_for(mentioned.account)) diff --git a/app/services/post_status_service.rb b/app/services/post_status_service.rb index 221aa42a3..d8202bea8 100644 --- a/app/services/post_status_service.rb +++ b/app/services/post_status_service.rb @@ -19,6 +19,7 @@ class PostStatusService < BaseService sensitive: options[:sensitive], spoiler_text: options[:spoiler_text] || '', visibility: options[:visibility], + language: detect_language(text), application: options[:application]) attach_media(status, media) @@ -51,6 +52,10 @@ class PostStatusService < BaseService media.update(status_id: status.id) end + def detect_language(text) + WhatLanguage.new(:all).language_iso(text) + end + def process_mentions_service @process_mentions_service ||= ProcessMentionsService.new end diff --git a/app/services/process_feed_service.rb b/app/services/process_feed_service.rb index 64a531e74..98d92f630 100644 --- a/app/services/process_feed_service.rb +++ b/app/services/process_feed_service.rb @@ -119,6 +119,7 @@ class ProcessFeedService < BaseService spoiler_text: content_warning(entry), created_at: published(entry), reply: thread?(entry), + language: content_language(entry), visibility: visibility_scope(entry) ) @@ -238,6 +239,10 @@ class ProcessFeedService < BaseService xml.at_xpath('./xmlns:content', xmlns: TagManager::XMLNS).content end + def content_language(xml = @xml) + xml.at_xpath('./xmlns:content', xmlns: TagManager::XMLNS)['xml:lang']&.presence || 'en' + end + def content_warning(xml = @xml) xml.at_xpath('./xmlns:summary', xmlns: TagManager::XMLNS)&.content || '' end diff --git a/db/migrate/20170414132105_add_language_to_statuses.rb b/db/migrate/20170414132105_add_language_to_statuses.rb new file mode 100644 index 000000000..59d51cb86 --- /dev/null +++ b/db/migrate/20170414132105_add_language_to_statuses.rb @@ -0,0 +1,5 @@ +class AddLanguageToStatuses < ActiveRecord::Migration[5.0] + def change + add_column :statuses, :language, :string, null: false, default: 'en' + end +end diff --git a/db/schema.rb b/db/schema.rb index 5f995ebda..62ff4207d 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 20170414080609) do +ActiveRecord::Schema.define(version: 20170414132105) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" @@ -40,7 +40,6 @@ ActiveRecord::Schema.define(version: 20170414080609) do t.datetime "header_updated_at" t.string "avatar_remote_url" t.datetime "subscription_expires_at" - t.datetime "last_webfingered_at" t.boolean "silenced", default: false, null: false t.boolean "suspended", default: false, null: false t.boolean "locked", default: false, null: false @@ -48,6 +47,7 @@ ActiveRecord::Schema.define(version: 20170414080609) do t.integer "statuses_count", default: 0, null: false t.integer "followers_count", default: 0, null: false t.integer "following_count", default: 0, null: false + t.datetime "last_webfingered_at" t.index "(((setweight(to_tsvector('simple'::regconfig, (display_name)::text), 'A'::\"char\") || setweight(to_tsvector('simple'::regconfig, (username)::text), 'B'::\"char\")) || setweight(to_tsvector('simple'::regconfig, (COALESCE(domain, ''::character varying))::text), 'C'::\"char\")))", name: "search_index", using: :gin t.index "lower((username)::text), lower((domain)::text)", name: "index_accounts_on_username_and_domain_lower", using: :btree t.index ["url"], name: "index_accounts_on_url", using: :btree @@ -244,6 +244,7 @@ ActiveRecord::Schema.define(version: 20170414080609) do t.boolean "reply", default: false t.integer "favourites_count", default: 0, null: false t.integer "reblogs_count", default: 0, null: false + t.string "language", default: "en", null: false t.index ["account_id"], name: "index_statuses_on_account_id", using: :btree t.index ["in_reply_to_id"], name: "index_statuses_on_in_reply_to_id", using: :btree t.index ["reblog_of_id"], name: "index_statuses_on_reblog_of_id", using: :btree