Fix html escape characters in the URL (#2138)
* fix character escaping in URL * add tests * put a comma after the last item * add HTML escape test
This commit is contained in:
		
							parent
							
								
									3572f4423f
								
							
						
					
					
						commit
						70891a99a9
					
				
					 2 changed files with 94 additions and 29 deletions
				
			
		|  | @ -13,10 +13,9 @@ class Formatter | ||||||
|     return reformat(status.content) unless status.local? |     return reformat(status.content) unless status.local? | ||||||
| 
 | 
 | ||||||
|     html = status.text |     html = status.text | ||||||
|     html = encode(html) |     html = encode_and_link_urls(html) | ||||||
|     html = simple_format(html, {}, sanitize: false) |     html = simple_format(html, {}, sanitize: false) | ||||||
|     html = html.delete("\n") |     html = html.delete("\n") | ||||||
|     html = link_urls(html) |  | ||||||
|     html = link_mentions(html, status.mentions) |     html = link_mentions(html, status.mentions) | ||||||
|     html = link_hashtags(html) |     html = link_hashtags(html) | ||||||
| 
 | 
 | ||||||
|  | @ -35,8 +34,7 @@ class Formatter | ||||||
|   def simplified_format(account) |   def simplified_format(account) | ||||||
|     return reformat(account.note) unless account.local? |     return reformat(account.note) unless account.local? | ||||||
| 
 | 
 | ||||||
|     html = encode(account.note) |     html = encode_and_link_urls(account.note) | ||||||
|     html = link_urls(html) |  | ||||||
|     html = link_accounts(html) |     html = link_accounts(html) | ||||||
|     html = link_hashtags(html) |     html = link_hashtags(html) | ||||||
| 
 | 
 | ||||||
|  | @ -49,6 +47,26 @@ class Formatter | ||||||
|     HTMLEntities.new.encode(html) |     HTMLEntities.new.encode(html) | ||||||
|   end |   end | ||||||
| 
 | 
 | ||||||
|  |   def encode_and_link_urls(html) | ||||||
|  |     entities = Twitter::Extractor.extract_urls_with_indices(html, extract_url_without_protocol: false) | ||||||
|  |     entities = entities.sort_by { |entity| entity[:indices].first } | ||||||
|  | 
 | ||||||
|  |     chars = html.to_s.to_char_a | ||||||
|  |     html_attrs = { | ||||||
|  |       target: '_blank', | ||||||
|  |       rel: 'nofollow noopener', | ||||||
|  |     } | ||||||
|  |     result = '' | ||||||
|  | 
 | ||||||
|  |     last_index = entities.reduce(0) do |index, entity| | ||||||
|  |       indices = entity[:indices] | ||||||
|  |       result += encode(chars[index...indices.first].join) | ||||||
|  |       result += Twitter::Autolink.send(:link_to_text, entity, link_html(entity[:url]), entity[:url], html_attrs) | ||||||
|  |       indices.last | ||||||
|  |     end | ||||||
|  |     result += encode(chars[last_index..-1].join) | ||||||
|  |   end | ||||||
|  | 
 | ||||||
|   def link_urls(html) |   def link_urls(html) | ||||||
|     Twitter::Autolink.auto_link_urls(html, url_target: '_blank', |     Twitter::Autolink.auto_link_urls(html, url_target: '_blank', | ||||||
|                                            link_attribute_block: lambda { |_, a| a[:rel] << ' noopener' }, |                                            link_attribute_block: lambda { |_, a| a[:rel] << ' noopener' }, | ||||||
|  |  | ||||||
|  | @ -2,7 +2,8 @@ require 'rails_helper' | ||||||
| 
 | 
 | ||||||
| RSpec.describe Formatter do | RSpec.describe Formatter do | ||||||
|   let(:account)       { Fabricate(:account, username: 'alice') } |   let(:account)       { Fabricate(:account, username: 'alice') } | ||||||
|   let(:local_status)  { Fabricate(:status, text: 'Hello world http://google.com', account: account) } |   let(:local_text)    { 'Hello world http://google.com' } | ||||||
|  |   let(:local_status)  { Fabricate(:status, text: local_text, account: account) } | ||||||
|   let(:remote_status) { Fabricate(:status, text: '<script>alert("Hello")</script> Beep boop', uri: 'beepboop', account: account) } |   let(:remote_status) { Fabricate(:status, text: '<script>alert("Hello")</script> Beep boop', uri: 'beepboop', account: account) } | ||||||
| 
 | 
 | ||||||
|   describe '#format' do |   describe '#format' do | ||||||
|  | @ -20,35 +21,81 @@ RSpec.describe Formatter do | ||||||
|       expect(subject).to match('<a href="http://google.com" rel="nofollow noopener" target="_blank"><span class="invisible">http://</span><span class="">google.com</span><span class="invisible"></span></a>') |       expect(subject).to match('<a href="http://google.com" rel="nofollow noopener" target="_blank"><span class="invisible">http://</span><span class="">google.com</span><span class="invisible"></span></a>') | ||||||
|     end |     end | ||||||
| 
 | 
 | ||||||
|  |     context 'matches a stand-alone medium URL' do | ||||||
|  |       let(:local_text) { 'https://hackernoon.com/the-power-to-build-communities-a-response-to-mark-zuckerberg-3f2cac9148a4' } | ||||||
|  |       it 'has valid url' do | ||||||
|  |         expect(subject).to include('href="https://hackernoon.com/the-power-to-build-communities-a-response-to-mark-zuckerberg-3f2cac9148a4"') | ||||||
|  |       end | ||||||
|  |     end | ||||||
|  | 
 | ||||||
|  |     context 'matches a stand-alone google URL' do | ||||||
|  |       let(:local_text) { 'http://google.com' } | ||||||
|  |       it 'has valid url' do | ||||||
|  |         expect(subject).to include('href="http://google.com"') | ||||||
|  |       end | ||||||
|  |     end | ||||||
|  | 
 | ||||||
|  |     context 'matches a URL without trailing period' do | ||||||
|  |       let(:local_text) { 'http://www.mcmansionhell.com/post/156408871451/50-states-of-mcmansion-hell-scottsdale-arizona. ' } | ||||||
|  |       it 'has valid url' do | ||||||
|  |         expect(subject).to include('href="http://www.mcmansionhell.com/post/156408871451/50-states-of-mcmansion-hell-scottsdale-arizona"') | ||||||
|  |       end | ||||||
|  |     end | ||||||
|  | 
 | ||||||
| =begin | =begin | ||||||
|     it 'matches a stand-alone medium URL' do |  | ||||||
|       expect(subject.match('https://hackernoon.com/the-power-to-build-communities-a-response-to-mark-zuckerberg-3f2cac9148a4')[0]).to eq 'https://hackernoon.com/the-power-to-build-communities-a-response-to-mark-zuckerberg-3f2cac9148a4' |  | ||||||
|     end |  | ||||||
| 
 |  | ||||||
|     it 'matches a stand-alone google URL' do |  | ||||||
|       expect(subject.match('http://google.com')[0]).to eq 'http://google.com' |  | ||||||
|     end |  | ||||||
| 
 |  | ||||||
|     it 'matches a URL without trailing period' do |  | ||||||
|       expect(subject.match('http://www.mcmansionhell.com/post/156408871451/50-states-of-mcmansion-hell-scottsdale-arizona. ')[0]).to eq 'http://www.mcmansionhell.com/post/156408871451/50-states-of-mcmansion-hell-scottsdale-arizona' |  | ||||||
|     end |  | ||||||
| 
 |  | ||||||
|     it 'matches a URL without closing paranthesis' do |     it 'matches a URL without closing paranthesis' do | ||||||
|       expect(subject.match('(http://google.com/)')[0]).to eq 'http://google.com' |       expect(subject.match('(http://google.com/)')[0]).to eq 'http://google.com' | ||||||
|     end |     end | ||||||
| 
 |  | ||||||
|     it 'matches a URL without exclamation point' do |  | ||||||
|       expect(subject.match('http://www.google.com! ')[0]).to eq 'http://www.google.com' |  | ||||||
|     end |  | ||||||
| 
 |  | ||||||
|     it 'matches a URL with a query string' do |  | ||||||
|       expect(subject.match('https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&q=autolink')[0]).to eq 'https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&q=autolink' |  | ||||||
|     end |  | ||||||
| 
 |  | ||||||
|     it 'matches a URL with parenthesis in it' do |  | ||||||
|       expect(subject.match('https://en.wikipedia.org/wiki/Diaspora_(software)')[0]).to eq 'https://en.wikipedia.org/wiki/Diaspora_(software)' |  | ||||||
|     end |  | ||||||
| =end | =end | ||||||
|  | 
 | ||||||
|  |     context 'matches a URL without exclamation point' do | ||||||
|  |       let(:local_text) { 'http://www.google.com!' } | ||||||
|  |       it 'has valid url' do | ||||||
|  |         expect(subject).to include('href="http://www.google.com"') | ||||||
|  |       end | ||||||
|  |     end | ||||||
|  | 
 | ||||||
|  |     context 'matches a URL without single quote' do | ||||||
|  |       let(:local_text) { "http://www.google.com'" } | ||||||
|  |       it 'has valid url' do | ||||||
|  |         expect(subject).to include('href="http://www.google.com"') | ||||||
|  |       end | ||||||
|  |     end | ||||||
|  | 
 | ||||||
|  |     context 'matches a URL without angle brackets' do | ||||||
|  |       let(:local_text) { 'http://www.google.com>' } | ||||||
|  |       it 'has valid url' do | ||||||
|  |         expect(subject).to include('href="http://www.google.com"') | ||||||
|  |       end | ||||||
|  |     end | ||||||
|  | 
 | ||||||
|  |     context 'matches a URL with a query string' do | ||||||
|  |       let(:local_text) { 'https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&q=autolink' } | ||||||
|  |       it 'has valid url' do | ||||||
|  |         expect(subject).to include('href="https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&q=autolink"') | ||||||
|  |       end | ||||||
|  |     end | ||||||
|  | 
 | ||||||
|  |     context 'matches a URL with parenthesis in it' do | ||||||
|  |       let(:local_text) { 'https://en.wikipedia.org/wiki/Diaspora_(software)' } | ||||||
|  |       it 'has valid url' do | ||||||
|  |         expect(subject).to include('href="https://en.wikipedia.org/wiki/Diaspora_(software)"') | ||||||
|  |       end | ||||||
|  |     end | ||||||
|  | 
 | ||||||
|  |     context 'contains html (script tag)' do | ||||||
|  |         let(:local_text) { '<script>alert("Hello")</script>' } | ||||||
|  |         it 'has valid url' do | ||||||
|  |             expect(subject).to match '<p><script>alert("Hello")</script></p>' | ||||||
|  |         end | ||||||
|  |     end | ||||||
|  | 
 | ||||||
|  |     context 'contains html (xss attack)' do | ||||||
|  |       let(:local_text) { %q{<img src="javascript:alert('XSS');">} } | ||||||
|  |       it 'has valid url' do | ||||||
|  |         expect(subject).to match '<p><img src="javascript:alert('XSS');"></p>' | ||||||
|  |       end | ||||||
|  |     end | ||||||
|   end |   end | ||||||
| 
 | 
 | ||||||
|   describe '#reformat' do |   describe '#reformat' do | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		
		Reference in a new issue