forked from cybrespace/mastodon
		
	Faster emojify() by avoiding str.replace() entirely (#4049)
This commit is contained in:
		
							parent
							
								
									331f0953e9
								
							
						
					
					
						commit
						e282580101
					
				
					 4 changed files with 71 additions and 37 deletions
				
			
		| 
						 | 
				
			
			@ -1,60 +1,55 @@
 | 
			
		|||
import emojione from 'emojione';
 | 
			
		||||
import Trie from 'substring-trie';
 | 
			
		||||
 | 
			
		||||
const toImage = str => shortnameToImage(unicodeToImage(str));
 | 
			
		||||
const mappedUnicode = emojione.mapUnicodeToShort();
 | 
			
		||||
const trie = new Trie(Object.keys(emojione.jsEscapeMap));
 | 
			
		||||
 | 
			
		||||
const unicodeToImage = str => {
 | 
			
		||||
  const mappedUnicode = emojione.mapUnicodeToShort();
 | 
			
		||||
 | 
			
		||||
  return str.replace(emojione.regUnicode, unicodeChar => {
 | 
			
		||||
    if (typeof unicodeChar === 'undefined' || unicodeChar === '' || !(unicodeChar in emojione.jsEscapeMap)) {
 | 
			
		||||
      return unicodeChar;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const unicode  = emojione.jsEscapeMap[unicodeChar];
 | 
			
		||||
    const short    = mappedUnicode[unicode];
 | 
			
		||||
    const filename = emojione.emojioneList[short].fname;
 | 
			
		||||
    const alt      = emojione.convert(unicode.toUpperCase());
 | 
			
		||||
 | 
			
		||||
    return `<img draggable="false" class="emojione" alt="${alt}" title="${short}" src="/emoji/${filename}.svg" />`;
 | 
			
		||||
  });
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
const shortnameToImage = str => {
 | 
			
		||||
  // This walks through the string from end to start, ignoring any tags (<p>, <br>, etc.)
 | 
			
		||||
  // and replacing valid shortnames like :smile: and :wink: that _aren't_ within
 | 
			
		||||
  // tags with an <img> version.
 | 
			
		||||
  // The goal is to be the same as an emojione.regShortNames replacement, but faster.
 | 
			
		||||
  // The reason we go backwards is because then we can replace substrings as we go.
 | 
			
		||||
  let i = str.length;
 | 
			
		||||
function emojify(str) {
 | 
			
		||||
  // This walks through the string from start to end, ignoring any tags (<p>, <br>, etc.)
 | 
			
		||||
  // and replacing valid shortnames like :smile: and :wink: as well as unicode strings
 | 
			
		||||
  // that _aren't_ within tags with an <img> version.
 | 
			
		||||
  // The goal is to be the same as an emojione.regShortNames/regUnicode replacement, but faster.
 | 
			
		||||
  let i = -1;
 | 
			
		||||
  let insideTag = false;
 | 
			
		||||
  let insideShortname = false;
 | 
			
		||||
  let shortnameEndIndex = -1;
 | 
			
		||||
  while (i--) {
 | 
			
		||||
  let shortnameStartIndex = -1;
 | 
			
		||||
  let match;
 | 
			
		||||
  while (++i < str.length) {
 | 
			
		||||
    const char = str.charAt(i);
 | 
			
		||||
    if (insideShortname && char === ':') {
 | 
			
		||||
      const shortname = str.substring(i, shortnameEndIndex + 1);
 | 
			
		||||
      const shortname = str.substring(shortnameStartIndex, i + 1);
 | 
			
		||||
      if (shortname in emojione.emojioneList) {
 | 
			
		||||
        const unicode = emojione.emojioneList[shortname].unicode[emojione.emojioneList[shortname].unicode.length - 1];
 | 
			
		||||
        const alt = emojione.convert(unicode.toUpperCase());
 | 
			
		||||
        const replacement = `<img draggable="false" class="emojione" alt="${alt}" title="${shortname}" src="/emoji/${unicode}.svg" />`;
 | 
			
		||||
        str = str.substring(0, i) + replacement + str.substring(shortnameEndIndex + 1);
 | 
			
		||||
        str = str.substring(0, shortnameStartIndex) + replacement + str.substring(i + 1);
 | 
			
		||||
        i += (replacement.length - shortname.length - 1); // jump ahead the length we've added to the string
 | 
			
		||||
      } else {
 | 
			
		||||
        i++; // stray colon, try again
 | 
			
		||||
        i--; // stray colon, try again
 | 
			
		||||
      }
 | 
			
		||||
      insideShortname = false;
 | 
			
		||||
    } else if (insideTag && char === '<') {
 | 
			
		||||
    } else if (insideTag && char === '>') {
 | 
			
		||||
      insideTag = false;
 | 
			
		||||
    } else if (char === '>') {
 | 
			
		||||
    } else if (char === '<') {
 | 
			
		||||
      insideTag = true;
 | 
			
		||||
      insideShortname = false;
 | 
			
		||||
    } else if (!insideTag && char === ':') {
 | 
			
		||||
      insideShortname = true;
 | 
			
		||||
      shortnameEndIndex = i;
 | 
			
		||||
      shortnameStartIndex = i;
 | 
			
		||||
    } else if (!insideTag && (match = trie.search(str.substring(i)))) {
 | 
			
		||||
      const unicodeStr = match;
 | 
			
		||||
      if (unicodeStr in emojione.jsEscapeMap) {
 | 
			
		||||
        const unicode  = emojione.jsEscapeMap[unicodeStr];
 | 
			
		||||
        const short    = mappedUnicode[unicode];
 | 
			
		||||
        const filename = emojione.emojioneList[short].fname;
 | 
			
		||||
        const alt      = emojione.convert(unicode.toUpperCase());
 | 
			
		||||
        const replacement =  `<img draggable="false" class="emojione" alt="${alt}" title="${short}" src="/emoji/${filename}.svg" />`;
 | 
			
		||||
        str = str.substring(0, i) + replacement + str.substring(i + unicodeStr.length);
 | 
			
		||||
        i += (replacement.length - unicodeStr.length); // jump ahead the length we've added to the string
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  return str;
 | 
			
		||||
};
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
export default function emojify(text) {
 | 
			
		||||
  return toImage(text);
 | 
			
		||||
};
 | 
			
		||||
export default emojify;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -102,6 +102,7 @@
 | 
			
		|||
    "sass-loader": "^6.0.6",
 | 
			
		||||
    "stringz": "^0.2.2",
 | 
			
		||||
    "style-loader": "^0.18.2",
 | 
			
		||||
    "substring-trie": "^1.0.0",
 | 
			
		||||
    "throng": "^4.0.0",
 | 
			
		||||
    "tiny-queue": "^0.2.1",
 | 
			
		||||
    "uuid": "^3.1.0",
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -46,4 +46,38 @@ describe('emojify', () => {
 | 
			
		|||
    expect(emojify(':smile')).to.equal(':smile');
 | 
			
		||||
  });
 | 
			
		||||
 | 
			
		||||
  it('does two emoji next to each other', () => {
 | 
			
		||||
    expect(emojify(':smile::wink:')).to.equal(
 | 
			
		||||
      '<img draggable="false" class="emojione" alt="😄" title=":smile:" src="/emoji/1f604.svg" /><img draggable="false" class="emojione" alt="😉" title=":wink:" src="/emoji/1f609.svg" />');
 | 
			
		||||
  });
 | 
			
		||||
 | 
			
		||||
  it('does unicode', () => {
 | 
			
		||||
    expect(emojify('\uD83D\uDC69\u200D\uD83D\uDC69\u200D\uD83D\uDC66\u200D\uD83D\uDC66')).to.equal(
 | 
			
		||||
      '<img draggable="false" class="emojione" alt="👩👩👦👦" title=":family_wwbb:" src="/emoji/1f469-1f469-1f466-1f466.svg" />');
 | 
			
		||||
    expect(emojify('\uD83D\uDC68\uD83D\uDC69\uD83D\uDC67\uD83D\uDC67')).to.equal(
 | 
			
		||||
      '<img draggable="false" class="emojione" alt="👨👩👧👧" title=":family_mwgg:" src="/emoji/1f468-1f469-1f467-1f467.svg" />');
 | 
			
		||||
    expect(emojify('\uD83D\uDC69\uD83D\uDC69\uD83D\uDC66')).to.equal('<img draggable="false" class="emojione" alt="👩👩👦" title=":family_wwb:" src="/emoji/1f469-1f469-1f466.svg" />');
 | 
			
		||||
    expect(emojify('\u2757')).to.equal(
 | 
			
		||||
      '<img draggable="false" class="emojione" alt="❗" title=":exclamation:" src="/emoji/2757.svg" />');
 | 
			
		||||
  });
 | 
			
		||||
 | 
			
		||||
  it('does multiple unicode', () => {
 | 
			
		||||
    expect(emojify('\u2757 #\uFE0F\u20E3')).to.equal(
 | 
			
		||||
      '<img draggable="false" class="emojione" alt="❗" title=":exclamation:" src="/emoji/2757.svg" /> <img draggable="false" class="emojione" alt="#️⃣" title=":hash:" src="/emoji/0023-20e3.svg" />');
 | 
			
		||||
    expect(emojify('\u2757#\uFE0F\u20E3')).to.equal(
 | 
			
		||||
      '<img draggable="false" class="emojione" alt="❗" title=":exclamation:" src="/emoji/2757.svg" /><img draggable="false" class="emojione" alt="#️⃣" title=":hash:" src="/emoji/0023-20e3.svg" />');
 | 
			
		||||
    expect(emojify('\u2757 #\uFE0F\u20E3 \u2757')).to.equal(
 | 
			
		||||
      '<img draggable="false" class="emojione" alt="❗" title=":exclamation:" src="/emoji/2757.svg" /> <img draggable="false" class="emojione" alt="#️⃣" title=":hash:" src="/emoji/0023-20e3.svg" /> <img draggable="false" class="emojione" alt="❗" title=":exclamation:" src="/emoji/2757.svg" />');
 | 
			
		||||
    expect(emojify('foo \u2757 #\uFE0F\u20E3 bar')).to.equal(
 | 
			
		||||
      'foo <img draggable="false" class="emojione" alt="❗" title=":exclamation:" src="/emoji/2757.svg" /> <img draggable="false" class="emojione" alt="#️⃣" title=":hash:" src="/emoji/0023-20e3.svg" /> bar');
 | 
			
		||||
  });
 | 
			
		||||
 | 
			
		||||
  it('does mixed unicode and shortnames', () => {
 | 
			
		||||
    expect(emojify(':smile:#\uFE0F\u20E3:wink:\u2757')).to.equal('<img draggable="false" class="emojione" alt="😄" title=":smile:" src="/emoji/1f604.svg" /><img draggable="false" class="emojione" alt="#️⃣" title=":hash:" src="/emoji/0023-20e3.svg" /><img draggable="false" class="emojione" alt="😉" title=":wink:" src="/emoji/1f609.svg" /><img draggable="false" class="emojione" alt="❗" title=":exclamation:" src="/emoji/2757.svg" />');
 | 
			
		||||
  });
 | 
			
		||||
 | 
			
		||||
  it('ignores unicode inside of tags', () => {
 | 
			
		||||
    expect(emojify('<p data-foo="\uD83D\uDC69\uD83D\uDC69\uD83D\uDC66"></p>')).to.equal('<p data-foo="\uD83D\uDC69\uD83D\uDC69\uD83D\uDC66"></p>');
 | 
			
		||||
  });
 | 
			
		||||
 | 
			
		||||
});
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -6819,6 +6819,10 @@ style-loader@^0.18.2:
 | 
			
		|||
    loader-utils "^1.0.2"
 | 
			
		||||
    schema-utils "^0.3.0"
 | 
			
		||||
 | 
			
		||||
substring-trie@^1.0.0:
 | 
			
		||||
  version "1.0.0"
 | 
			
		||||
  resolved "https://registry.yarnpkg.com/substring-trie/-/substring-trie-1.0.0.tgz#5a7ecb83aefcca7b3720f7897cf69e97023be143"
 | 
			
		||||
 | 
			
		||||
sugarss@^1.0.0:
 | 
			
		||||
  version "1.0.0"
 | 
			
		||||
  resolved "https://registry.yarnpkg.com/sugarss/-/sugarss-1.0.0.tgz#65e51b3958432fb70d5451a68bb33e32d0cf1ef7"
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		
		Reference in a new issue