Fix URL linkifier grabbing full-width spaces and quotations (#9997)
Fix #9993 Fix #5654
This commit is contained in:
		
							parent
							
								
									a666d1e7ed
								
							
						
					
					
						commit
						016ad37bc8
					
				
					 3 changed files with 55 additions and 5 deletions
				
			
		|  | @ -199,12 +199,22 @@ class Formatter | |||
|     result.flatten.join | ||||
|   end | ||||
| 
 | ||||
|   UNICODE_ESCAPE_BLACKLIST_RE = /\p{Z}|\p{P}/ | ||||
| 
 | ||||
|   def utf8_friendly_extractor(text, options = {}) | ||||
|     old_to_new_index = [0] | ||||
| 
 | ||||
|     escaped = text.chars.map do |c| | ||||
|       output = c.ord.to_s(16).length > 2 ? CGI.escape(c) : c | ||||
|       output = begin | ||||
|         if c.ord.to_s(16).length > 2 && UNICODE_ESCAPE_BLACKLIST_RE.match(c).nil? | ||||
|           CGI.escape(c) | ||||
|         else | ||||
|           c | ||||
|         end | ||||
|       end | ||||
| 
 | ||||
|       old_to_new_index << old_to_new_index.last + output.length | ||||
| 
 | ||||
|       output | ||||
|     end.join | ||||
| 
 | ||||
|  |  | |||
|  | @ -1,7 +1,7 @@ | |||
| module Twitter | ||||
|   class Regex | ||||
|     REGEXEN[:valid_general_url_path_chars] = /[^\p{White_Space}\(\)\?]/iou | ||||
|     REGEXEN[:valid_url_path_ending_chars] = /[^\p{White_Space}\(\)\?!\*';:=\,\.\$%\[\]~&\|@]|(?:#{REGEXEN[:valid_url_balanced_parens]})/iou | ||||
|     REGEXEN[:valid_general_url_path_chars] = /[^\p{White_Space}<>\(\)\?]/iou | ||||
|     REGEXEN[:valid_url_path_ending_chars] = /[^\p{White_Space}\(\)\?!\*"'「」<>;:=\,\.\$%\[\]~&\|@]|(?:#{REGEXEN[:valid_url_balanced_parens]})/iou | ||||
|     REGEXEN[:valid_url_balanced_parens] = / | ||||
|       \( | ||||
|         (?: | ||||
|  |  | |||
|  | @ -115,6 +115,22 @@ RSpec.describe Formatter do | |||
|       end | ||||
|     end | ||||
| 
 | ||||
|     context 'given a URL in quotation marks' do | ||||
|       let(:text) { '"https://example.com/"' } | ||||
| 
 | ||||
|       it 'does not match the quotation marks' do | ||||
|         is_expected.to include 'href="https://example.com/"' | ||||
|       end | ||||
|     end | ||||
| 
 | ||||
|     context 'given a URL in angle brackets' do | ||||
|       let(:text) { '<https://example.com/>' } | ||||
| 
 | ||||
|       it 'does not match the angle brackets' do | ||||
|         is_expected.to include 'href="https://example.com/"' | ||||
|       end | ||||
|     end | ||||
| 
 | ||||
|     context 'given a URL with Japanese path string' do | ||||
|       let(:text) { 'https://ja.wikipedia.org/wiki/日本' } | ||||
| 
 | ||||
|  | @ -131,6 +147,22 @@ RSpec.describe Formatter do | |||
|       end | ||||
|     end | ||||
| 
 | ||||
|     context 'given a URL with a full-width space' do | ||||
|       let(:text) { 'https://example.com/ abc123' } | ||||
| 
 | ||||
|       it 'does not match the full-width space' do | ||||
|         is_expected.to include 'href="https://example.com/"' | ||||
|       end | ||||
|     end | ||||
| 
 | ||||
|     context 'given a URL in Japanese quotation marks' do | ||||
|       let(:text) { '「[https://example.org/」' } | ||||
| 
 | ||||
|       it 'does not match the quotation marks' do | ||||
|         is_expected.to include 'href="https://example.org/"' | ||||
|       end | ||||
|     end | ||||
| 
 | ||||
|     context 'given a URL with Simplified Chinese path string' do | ||||
|       let(:text) { 'https://baike.baidu.com/item/中华人民共和国' } | ||||
| 
 | ||||
|  | @ -150,7 +182,11 @@ RSpec.describe Formatter do | |||
|     context 'given a URL containing unsafe code (XSS attack, visible part)' do | ||||
|       let(:text) { %q{http://example.com/b<del>b</del>} } | ||||
| 
 | ||||
|       it 'escapes the HTML in the URL' do | ||||
|       it 'does not include the HTML in the URL' do | ||||
|         is_expected.to include '"http://example.com/b"' | ||||
|       end | ||||
| 
 | ||||
|       it 'escapes the HTML' do | ||||
|         is_expected.to include '<del>b</del>' | ||||
|       end | ||||
|     end | ||||
|  | @ -158,7 +194,11 @@ RSpec.describe Formatter do | |||
|     context 'given a URL containing unsafe code (XSS attack, invisible part)' do | ||||
|       let(:text) { %q{http://example.com/blahblahblahblah/a<script>alert("Hello")</script>} } | ||||
| 
 | ||||
|       it 'escapes the HTML in the URL' do | ||||
|       it 'does not include the HTML in the URL' do | ||||
|         is_expected.to include '"http://example.com/blahblahblahblah/a"' | ||||
|       end | ||||
| 
 | ||||
|       it 'escapes the HTML' do | ||||
|         is_expected.to include '<script>alert("Hello")</script>' | ||||
|       end | ||||
|     end | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue