From b5a9c6b3d292abc7e47d8a6f830f6b5589c04862 Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Fri, 12 May 2017 17:46:44 +0200 Subject: [PATCH] Prepend reblogs' wrapper content with "RT @original_author", (#3013) so that when a reblog parse fails on another instance, it doesn't look like a misattributed/stolen text --- app/lib/atom_serializer.rb | 2 +- app/lib/formatter.rb | 45 ++++++++++++++-------- spec/lib/formatter_spec.rb | 79 ++++++++++++++++++++++++++------------ 3 files changed, 85 insertions(+), 41 deletions(-) diff --git a/app/lib/atom_serializer.rb b/app/lib/atom_serializer.rb index db6bc4dd2..288a9cfad 100644 --- a/app/lib/atom_serializer.rb +++ b/app/lib/atom_serializer.rb @@ -337,7 +337,7 @@ class AtomSerializer def serialize_status_attributes(entry, status) append_element(entry, 'summary', Formatter.instance.format(status.proper, :spoiler_text, false).to_str, 'xml:lang': status.language, type: 'html') if status.spoiler_text? - append_element(entry, 'content', Formatter.instance.format(status.proper).to_str, type: 'html', 'xml:lang': status.language) + append_element(entry, 'content', Formatter.instance.format(status).to_str, type: 'html', 'xml:lang': status.language) status.mentions.each do |mentioned| append_element(entry, 'link', nil, rel: :mentioned, 'ostatus:object-type': TagManager::TYPES[:person], href: TagManager.instance.uri_for(mentioned.account)) diff --git a/app/lib/formatter.rb b/app/lib/formatter.rb index 5a1b63c0a..43d23db96 100644 --- a/app/lib/formatter.rb +++ b/app/lib/formatter.rb @@ -10,13 +10,24 @@ class Formatter include ActionView::Helpers::TextHelper def format(status, attribute = :text, paragraphize = true) + if status.reblog? + prepend_reblog = status.reblog.account.acct + status = status.proper + else + prepend_reblog = false + end + raw_content = status.public_send(attribute) return '' if raw_content.blank? return reformat(raw_content) unless status.local? + linkable_accounts = status.mentions.map(&:account) + linkable_accounts << status.account + html = raw_content - html = encode_and_link_urls(html, status.mentions) + html = "RT @#{prepend_reblog} #{html}" if prepend_reblog + html = encode_and_link_urls(html, linkable_accounts) html = simple_format(html, {}, sanitize: false) if paragraphize html = html.delete("\n") @@ -52,7 +63,7 @@ class Formatter HTMLEntities.new.encode(html) end - def encode_and_link_urls(html, mentions = nil) + def encode_and_link_urls(html, accounts = nil) entities = Extractor.extract_entities_with_indices(html, extract_url_without_protocol: false) rewrite(html.dup, entities) do |entity| @@ -61,7 +72,7 @@ class Formatter elsif entity[:hashtag] link_to_hashtag(entity) elsif entity[:screen_name] - link_to_mention(entity, mentions) + link_to_mention(entity, accounts) end end end @@ -69,19 +80,21 @@ class Formatter def rewrite(text, entities) chars = text.to_s.to_char_a - # sort by start index + # Sort by start index entities = entities.sort_by do |entity| indices = entity.respond_to?(:indices) ? entity.indices : entity[:indices] indices.first end result = [] + last_index = entities.reduce(0) do |index, entity| indices = entity.respond_to?(:indices) ? entity.indices : entity[:indices] result << encode(chars[index...indices.first].join) result << yield(entity) indices.last end + result << encode(chars[last_index..-1].join) result.flatten.join @@ -89,26 +102,28 @@ class Formatter def link_to_url(entity) normalized_url = Addressable::URI.parse(entity[:url]).normalize - html_attrs = { - target: '_blank', - rel: 'nofollow noopener', - } + html_attrs = { target: '_blank', rel: 'nofollow noopener' } + Twitter::Autolink.send(:link_to_text, entity, link_html(entity[:url]), normalized_url, html_attrs) rescue Addressable::URI::InvalidURIError encode(entity[:url]) end - def link_to_mention(entity, mentions) + def link_to_mention(entity, linkable_accounts) acct = entity[:screen_name] - return link_to_account(acct) unless mentions - mention = mentions.find { |item| TagManager.instance.same_acct?(item.account.acct, acct) } - mention ? mention_html(mention.account) : "@#{acct}" + + return link_to_account(acct) unless linkable_accounts + + account = linkable_accounts.find { |item| TagManager.instance.same_acct?(item.acct, acct) } + account ? mention_html(account) : "@#{acct}" end def link_to_account(acct) username, domain = acct.split('@') - domain = nil if TagManager.instance.local_domain?(domain) + + domain = nil if TagManager.instance.local_domain?(domain) account = Account.find_remote(username, domain) + account ? mention_html(account) : "@#{acct}" end @@ -117,7 +132,7 @@ class Formatter end def link_html(url) - url = Addressable::URI.parse(url).display_uri.to_s + url = Addressable::URI.parse(url).display_uri.to_s prefix = url.match(/\Ahttps?:\/\/(www\.)?/).to_s text = url[prefix.length, 30] suffix = url[prefix.length + 30..-1] @@ -127,7 +142,7 @@ class Formatter end def hashtag_html(tag) - "##{tag}" + "##{tag}" end def mention_html(account) diff --git a/spec/lib/formatter_spec.rb b/spec/lib/formatter_spec.rb index 15b4e1d96..1db7f0bc9 100644 --- a/spec/lib/formatter_spec.rb +++ b/spec/lib/formatter_spec.rb @@ -7,38 +7,56 @@ RSpec.describe Formatter do let(:remote_status) { Fabricate(:status, text: ' Beep boop', uri: 'beepboop', account: account) } let(:local_text_with_mention) { "@#{account.username} @#{account.username}@example.com #{local_text}?x=@#{account.username} #hashtag" } - let(:local_status_with_mention) { Fabricate(:status, text: local_text_with_mention, - account: account, mentions: [Fabricate(:mention, account: account)]) } + + let(:local_status_with_mention) do + Fabricate( + :status, + text: local_text_with_mention, + account: account, + mentions: [Fabricate(:mention, account: account)] + ) + end describe '#format' do subject { Formatter.instance.format(local_status) } - it 'returns a string' do - expect(subject).to be_a String + context 'with standalone status' do + it 'returns a string' do + expect(subject).to be_a String + end + + it 'contains plain text' do + expect(subject).to match('Hello world') + end + + it 'contains a link' do + expect(subject).to match('google.com/') + end + + it 'contains a mention' do + result = Formatter.instance.format(local_status_with_mention) + expect(result).to match "@#{account.username}" + expect(result).to match %r{href=\"http://google.com/\?x=@#{account.username}} + expect(result).not_to match "href=\"https://example.com/@#{account.username}" + end + + it 'contains a hashtag' do + result = Formatter.instance.format(local_status_with_mention) + expect(result).to match('/tags/hashtag" class="mention hashtag" rel="tag">#hashtag') + end end - it 'contains plain text' do - expect(subject).to match('Hello world') - end + context 'with reblog' do + let(:local_status) { Fabricate(:status, account: account, reblog: Fabricate(:status, text: 'Hello world', account: account)) } - it 'contains a link' do - expect(subject).to match('google.com/') - end - - it 'contains a mention' do - result = Formatter.instance.format(local_status_with_mention) - expect(result).to match "@#{account.username}" - expect(result).to match %r{href=\"http://google.com/\?x=@#{account.username}} - expect(result).not_to match "href=\"https://example.com/@#{account.username}" - end - - it 'contains a hashtag' do - result = Formatter.instance.format(local_status_with_mention) - expect(result).to match("/tags/hashtag\" class=\"mention hashtag\">#hashtag") + it 'contains credit to original author' do + expect(subject).to include("RT @#{account.username} Hello world") + end end context 'matches a stand-alone medium URL' do let(:local_text) { 'https://hackernoon.com/the-power-to-build-communities-a-response-to-mark-zuckerberg-3f2cac9148a4' } + it 'has valid url' do expect(subject).to include('href="https://hackernoon.com/the-power-to-build-communities-a-response-to-mark-zuckerberg-3f2cac9148a4"') end @@ -46,6 +64,7 @@ RSpec.describe Formatter do context 'matches a stand-alone google URL' do let(:local_text) { 'http://google.com' } + it 'has valid url' do expect(subject).to include('href="http://google.com/"') end @@ -53,6 +72,7 @@ RSpec.describe Formatter do context 'matches a stand-alone IDN URL' do let(:local_text) { 'https://nic.みんな/' } + it 'has valid url' do expect(subject).to include('href="https://nic.xn--q9jyb4c/"') end @@ -64,6 +84,7 @@ RSpec.describe Formatter do context 'matches a URL without trailing period' do let(:local_text) { 'http://www.mcmansionhell.com/post/156408871451/50-states-of-mcmansion-hell-scottsdale-arizona. ' } + it 'has valid url' do expect(subject).to include('href="http://www.mcmansionhell.com/post/156408871451/50-states-of-mcmansion-hell-scottsdale-arizona"') end @@ -75,6 +96,7 @@ RSpec.describe Formatter do context 'matches a URL without exclamation point' do let(:local_text) { 'http://www.google.com!' } + it 'has valid url' do expect(subject).to include('href="http://www.google.com/"') end @@ -82,6 +104,7 @@ RSpec.describe Formatter do context 'matches a URL without single quote' do let(:local_text) { "http://www.google.com'" } + it 'has valid url' do expect(subject).to include('href="http://www.google.com/"') end @@ -89,6 +112,7 @@ RSpec.describe Formatter do context 'matches a URL without angle brackets' do let(:local_text) { 'http://www.google.com>' } + it 'has valid url' do expect(subject).to include('href="http://www.google.com/"') end @@ -96,6 +120,7 @@ RSpec.describe Formatter do context 'matches a URL with a query string' do let(:local_text) { 'https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&q=autolink' } + it 'has valid url' do expect(subject).to include('href="https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&q=autolink"') end @@ -103,20 +128,23 @@ RSpec.describe Formatter do context 'matches a URL with parenthesis in it' do let(:local_text) { 'https://en.wikipedia.org/wiki/Diaspora_(software)' } + it 'has valid url' do expect(subject).to include('href="https://en.wikipedia.org/wiki/Diaspora_(software)"') end end context 'contains html (script tag)' do - let(:local_text) { '' } - it 'has valid url' do - expect(subject).to match '

<script>alert("Hello")</script>

' - end + let(:local_text) { '' } + + it 'has valid url' do + expect(subject).to match '

<script>alert("Hello")</script>

' + end end context 'contains html (xss attack)' do let(:local_text) { %q{} } + it 'has valid url' do expect(subject).to match '

<img src="javascript:alert('XSS');">

' end @@ -124,6 +152,7 @@ RSpec.describe Formatter do context 'contains invalid URL' do let(:local_text) { 'http://www\.google\.com' } + it 'has valid url' do expect(subject).to eq '

http://www\.google\.com

' end