forked from cybrespace/mastodon
		
	Fix URL linkifier grabbing full-width spaces and quotations (#9997)
Fix #9993 Fix #5654
This commit is contained in:
		
							parent
							
								
									a666d1e7ed
								
							
						
					
					
						commit
						016ad37bc8
					
				
					 3 changed files with 55 additions and 5 deletions
				
			
		| 
						 | 
				
			
			@ -199,12 +199,22 @@ class Formatter
 | 
			
		|||
    result.flatten.join
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  UNICODE_ESCAPE_BLACKLIST_RE = /\p{Z}|\p{P}/
 | 
			
		||||
 | 
			
		||||
  def utf8_friendly_extractor(text, options = {})
 | 
			
		||||
    old_to_new_index = [0]
 | 
			
		||||
 | 
			
		||||
    escaped = text.chars.map do |c|
 | 
			
		||||
      output = c.ord.to_s(16).length > 2 ? CGI.escape(c) : c
 | 
			
		||||
      output = begin
 | 
			
		||||
        if c.ord.to_s(16).length > 2 && UNICODE_ESCAPE_BLACKLIST_RE.match(c).nil?
 | 
			
		||||
          CGI.escape(c)
 | 
			
		||||
        else
 | 
			
		||||
          c
 | 
			
		||||
        end
 | 
			
		||||
      end
 | 
			
		||||
 | 
			
		||||
      old_to_new_index << old_to_new_index.last + output.length
 | 
			
		||||
 | 
			
		||||
      output
 | 
			
		||||
    end.join
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,7 +1,7 @@
 | 
			
		|||
module Twitter
 | 
			
		||||
  class Regex
 | 
			
		||||
    REGEXEN[:valid_general_url_path_chars] = /[^\p{White_Space}\(\)\?]/iou
 | 
			
		||||
    REGEXEN[:valid_url_path_ending_chars] = /[^\p{White_Space}\(\)\?!\*';:=\,\.\$%\[\]~&\|@]|(?:#{REGEXEN[:valid_url_balanced_parens]})/iou
 | 
			
		||||
    REGEXEN[:valid_general_url_path_chars] = /[^\p{White_Space}<>\(\)\?]/iou
 | 
			
		||||
    REGEXEN[:valid_url_path_ending_chars] = /[^\p{White_Space}\(\)\?!\*"'「」<>;:=\,\.\$%\[\]~&\|@]|(?:#{REGEXEN[:valid_url_balanced_parens]})/iou
 | 
			
		||||
    REGEXEN[:valid_url_balanced_parens] = /
 | 
			
		||||
      \(
 | 
			
		||||
        (?:
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -115,6 +115,22 @@ RSpec.describe Formatter do
 | 
			
		|||
      end
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    context 'given a URL in quotation marks' do
 | 
			
		||||
      let(:text) { '"https://example.com/"' }
 | 
			
		||||
 | 
			
		||||
      it 'does not match the quotation marks' do
 | 
			
		||||
        is_expected.to include 'href="https://example.com/"'
 | 
			
		||||
      end
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    context 'given a URL in angle brackets' do
 | 
			
		||||
      let(:text) { '<https://example.com/>' }
 | 
			
		||||
 | 
			
		||||
      it 'does not match the angle brackets' do
 | 
			
		||||
        is_expected.to include 'href="https://example.com/"'
 | 
			
		||||
      end
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    context 'given a URL with Japanese path string' do
 | 
			
		||||
      let(:text) { 'https://ja.wikipedia.org/wiki/日本' }
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -131,6 +147,22 @@ RSpec.describe Formatter do
 | 
			
		|||
      end
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    context 'given a URL with a full-width space' do
 | 
			
		||||
      let(:text) { 'https://example.com/ abc123' }
 | 
			
		||||
 | 
			
		||||
      it 'does not match the full-width space' do
 | 
			
		||||
        is_expected.to include 'href="https://example.com/"'
 | 
			
		||||
      end
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    context 'given a URL in Japanese quotation marks' do
 | 
			
		||||
      let(:text) { '「[https://example.org/」' }
 | 
			
		||||
 | 
			
		||||
      it 'does not match the quotation marks' do
 | 
			
		||||
        is_expected.to include 'href="https://example.org/"'
 | 
			
		||||
      end
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    context 'given a URL with Simplified Chinese path string' do
 | 
			
		||||
      let(:text) { 'https://baike.baidu.com/item/中华人民共和国' }
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -150,7 +182,11 @@ RSpec.describe Formatter do
 | 
			
		|||
    context 'given a URL containing unsafe code (XSS attack, visible part)' do
 | 
			
		||||
      let(:text) { %q{http://example.com/b<del>b</del>} }
 | 
			
		||||
 | 
			
		||||
      it 'escapes the HTML in the URL' do
 | 
			
		||||
      it 'does not include the HTML in the URL' do
 | 
			
		||||
        is_expected.to include '"http://example.com/b"'
 | 
			
		||||
      end
 | 
			
		||||
 | 
			
		||||
      it 'escapes the HTML' do
 | 
			
		||||
        is_expected.to include '<del>b</del>'
 | 
			
		||||
      end
 | 
			
		||||
    end
 | 
			
		||||
| 
						 | 
				
			
			@ -158,7 +194,11 @@ RSpec.describe Formatter do
 | 
			
		|||
    context 'given a URL containing unsafe code (XSS attack, invisible part)' do
 | 
			
		||||
      let(:text) { %q{http://example.com/blahblahblahblah/a<script>alert("Hello")</script>} }
 | 
			
		||||
 | 
			
		||||
      it 'escapes the HTML in the URL' do
 | 
			
		||||
      it 'does not include the HTML in the URL' do
 | 
			
		||||
        is_expected.to include '"http://example.com/blahblahblahblah/a"'
 | 
			
		||||
      end
 | 
			
		||||
 | 
			
		||||
      it 'escapes the HTML' do
 | 
			
		||||
        is_expected.to include '<script>alert("Hello")</script>'
 | 
			
		||||
      end
 | 
			
		||||
    end
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		
		Reference in a new issue