Faster emojify() by avoiding str.replace() entirely (#4049)
This commit is contained in:
		
							parent
							
								
									331f0953e9
								
							
						
					
					
						commit
						e282580101
					
				
					 4 changed files with 71 additions and 37 deletions
				
			
		|  | @ -1,60 +1,55 @@ | |||
| import emojione from 'emojione'; | ||||
| import Trie from 'substring-trie'; | ||||
| 
 | ||||
| const toImage = str => shortnameToImage(unicodeToImage(str)); | ||||
| const mappedUnicode = emojione.mapUnicodeToShort(); | ||||
| const trie = new Trie(Object.keys(emojione.jsEscapeMap)); | ||||
| 
 | ||||
| const unicodeToImage = str => { | ||||
|   const mappedUnicode = emojione.mapUnicodeToShort(); | ||||
| 
 | ||||
|   return str.replace(emojione.regUnicode, unicodeChar => { | ||||
|     if (typeof unicodeChar === 'undefined' || unicodeChar === '' || !(unicodeChar in emojione.jsEscapeMap)) { | ||||
|       return unicodeChar; | ||||
|     } | ||||
| 
 | ||||
|     const unicode  = emojione.jsEscapeMap[unicodeChar]; | ||||
|     const short    = mappedUnicode[unicode]; | ||||
|     const filename = emojione.emojioneList[short].fname; | ||||
|     const alt      = emojione.convert(unicode.toUpperCase()); | ||||
| 
 | ||||
|     return `<img draggable="false" class="emojione" alt="${alt}" title="${short}" src="/emoji/${filename}.svg" />`; | ||||
|   }); | ||||
| }; | ||||
| 
 | ||||
| const shortnameToImage = str => { | ||||
|   // This walks through the string from end to start, ignoring any tags (<p>, <br>, etc.)
 | ||||
|   // and replacing valid shortnames like :smile: and :wink: that _aren't_ within
 | ||||
|   // tags with an <img> version.
 | ||||
|   // The goal is to be the same as an emojione.regShortNames replacement, but faster.
 | ||||
|   // The reason we go backwards is because then we can replace substrings as we go.
 | ||||
|   let i = str.length; | ||||
| function emojify(str) { | ||||
|   // This walks through the string from start to end, ignoring any tags (<p>, <br>, etc.)
 | ||||
|   // and replacing valid shortnames like :smile: and :wink: as well as unicode strings
 | ||||
|   // that _aren't_ within tags with an <img> version.
 | ||||
|   // The goal is to be the same as an emojione.regShortNames/regUnicode replacement, but faster.
 | ||||
|   let i = -1; | ||||
|   let insideTag = false; | ||||
|   let insideShortname = false; | ||||
|   let shortnameEndIndex = -1; | ||||
|   while (i--) { | ||||
|   let shortnameStartIndex = -1; | ||||
|   let match; | ||||
|   while (++i < str.length) { | ||||
|     const char = str.charAt(i); | ||||
|     if (insideShortname && char === ':') { | ||||
|       const shortname = str.substring(i, shortnameEndIndex + 1); | ||||
|       const shortname = str.substring(shortnameStartIndex, i + 1); | ||||
|       if (shortname in emojione.emojioneList) { | ||||
|         const unicode = emojione.emojioneList[shortname].unicode[emojione.emojioneList[shortname].unicode.length - 1]; | ||||
|         const alt = emojione.convert(unicode.toUpperCase()); | ||||
|         const replacement = `<img draggable="false" class="emojione" alt="${alt}" title="${shortname}" src="/emoji/${unicode}.svg" />`; | ||||
|         str = str.substring(0, i) + replacement + str.substring(shortnameEndIndex + 1); | ||||
|         str = str.substring(0, shortnameStartIndex) + replacement + str.substring(i + 1); | ||||
|         i += (replacement.length - shortname.length - 1); // jump ahead the length we've added to the string
 | ||||
|       } else { | ||||
|         i++; // stray colon, try again
 | ||||
|         i--; // stray colon, try again
 | ||||
|       } | ||||
|       insideShortname = false; | ||||
|     } else if (insideTag && char === '<') { | ||||
|     } else if (insideTag && char === '>') { | ||||
|       insideTag = false; | ||||
|     } else if (char === '>') { | ||||
|     } else if (char === '<') { | ||||
|       insideTag = true; | ||||
|       insideShortname = false; | ||||
|     } else if (!insideTag && char === ':') { | ||||
|       insideShortname = true; | ||||
|       shortnameEndIndex = i; | ||||
|       shortnameStartIndex = i; | ||||
|     } else if (!insideTag && (match = trie.search(str.substring(i)))) { | ||||
|       const unicodeStr = match; | ||||
|       if (unicodeStr in emojione.jsEscapeMap) { | ||||
|         const unicode  = emojione.jsEscapeMap[unicodeStr]; | ||||
|         const short    = mappedUnicode[unicode]; | ||||
|         const filename = emojione.emojioneList[short].fname; | ||||
|         const alt      = emojione.convert(unicode.toUpperCase()); | ||||
|         const replacement =  `<img draggable="false" class="emojione" alt="${alt}" title="${short}" src="/emoji/${filename}.svg" />`; | ||||
|         str = str.substring(0, i) + replacement + str.substring(i + unicodeStr.length); | ||||
|         i += (replacement.length - unicodeStr.length); // jump ahead the length we've added to the string
 | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|   return str; | ||||
| }; | ||||
| } | ||||
| 
 | ||||
| export default function emojify(text) { | ||||
|   return toImage(text); | ||||
| }; | ||||
| export default emojify; | ||||
|  |  | |||
|  | @ -102,6 +102,7 @@ | |||
|     "sass-loader": "^6.0.6", | ||||
|     "stringz": "^0.2.2", | ||||
|     "style-loader": "^0.18.2", | ||||
|     "substring-trie": "^1.0.0", | ||||
|     "throng": "^4.0.0", | ||||
|     "tiny-queue": "^0.2.1", | ||||
|     "uuid": "^3.1.0", | ||||
|  |  | |||
|  | @ -46,4 +46,38 @@ describe('emojify', () => { | |||
|     expect(emojify(':smile')).to.equal(':smile'); | ||||
|   }); | ||||
| 
 | ||||
|   it('does two emoji next to each other', () => { | ||||
|     expect(emojify(':smile::wink:')).to.equal( | ||||
|       '<img draggable="false" class="emojione" alt="😄" title=":smile:" src="/emoji/1f604.svg" /><img draggable="false" class="emojione" alt="😉" title=":wink:" src="/emoji/1f609.svg" />'); | ||||
|   }); | ||||
| 
 | ||||
|   it('does unicode', () => { | ||||
|     expect(emojify('\uD83D\uDC69\u200D\uD83D\uDC69\u200D\uD83D\uDC66\u200D\uD83D\uDC66')).to.equal( | ||||
|       '<img draggable="false" class="emojione" alt="👩👩👦👦" title=":family_wwbb:" src="/emoji/1f469-1f469-1f466-1f466.svg" />'); | ||||
|     expect(emojify('\uD83D\uDC68\uD83D\uDC69\uD83D\uDC67\uD83D\uDC67')).to.equal( | ||||
|       '<img draggable="false" class="emojione" alt="👨👩👧👧" title=":family_mwgg:" src="/emoji/1f468-1f469-1f467-1f467.svg" />'); | ||||
|     expect(emojify('\uD83D\uDC69\uD83D\uDC69\uD83D\uDC66')).to.equal('<img draggable="false" class="emojione" alt="👩👩👦" title=":family_wwb:" src="/emoji/1f469-1f469-1f466.svg" />'); | ||||
|     expect(emojify('\u2757')).to.equal( | ||||
|       '<img draggable="false" class="emojione" alt="❗" title=":exclamation:" src="/emoji/2757.svg" />'); | ||||
|   }); | ||||
| 
 | ||||
|   it('does multiple unicode', () => { | ||||
|     expect(emojify('\u2757 #\uFE0F\u20E3')).to.equal( | ||||
|       '<img draggable="false" class="emojione" alt="❗" title=":exclamation:" src="/emoji/2757.svg" /> <img draggable="false" class="emojione" alt="#️⃣" title=":hash:" src="/emoji/0023-20e3.svg" />'); | ||||
|     expect(emojify('\u2757#\uFE0F\u20E3')).to.equal( | ||||
|       '<img draggable="false" class="emojione" alt="❗" title=":exclamation:" src="/emoji/2757.svg" /><img draggable="false" class="emojione" alt="#️⃣" title=":hash:" src="/emoji/0023-20e3.svg" />'); | ||||
|     expect(emojify('\u2757 #\uFE0F\u20E3 \u2757')).to.equal( | ||||
|       '<img draggable="false" class="emojione" alt="❗" title=":exclamation:" src="/emoji/2757.svg" /> <img draggable="false" class="emojione" alt="#️⃣" title=":hash:" src="/emoji/0023-20e3.svg" /> <img draggable="false" class="emojione" alt="❗" title=":exclamation:" src="/emoji/2757.svg" />'); | ||||
|     expect(emojify('foo \u2757 #\uFE0F\u20E3 bar')).to.equal( | ||||
|       'foo <img draggable="false" class="emojione" alt="❗" title=":exclamation:" src="/emoji/2757.svg" /> <img draggable="false" class="emojione" alt="#️⃣" title=":hash:" src="/emoji/0023-20e3.svg" /> bar'); | ||||
|   }); | ||||
| 
 | ||||
|   it('does mixed unicode and shortnames', () => { | ||||
|     expect(emojify(':smile:#\uFE0F\u20E3:wink:\u2757')).to.equal('<img draggable="false" class="emojione" alt="😄" title=":smile:" src="/emoji/1f604.svg" /><img draggable="false" class="emojione" alt="#️⃣" title=":hash:" src="/emoji/0023-20e3.svg" /><img draggable="false" class="emojione" alt="😉" title=":wink:" src="/emoji/1f609.svg" /><img draggable="false" class="emojione" alt="❗" title=":exclamation:" src="/emoji/2757.svg" />'); | ||||
|   }); | ||||
| 
 | ||||
|   it('ignores unicode inside of tags', () => { | ||||
|     expect(emojify('<p data-foo="\uD83D\uDC69\uD83D\uDC69\uD83D\uDC66"></p>')).to.equal('<p data-foo="\uD83D\uDC69\uD83D\uDC69\uD83D\uDC66"></p>'); | ||||
|   }); | ||||
| 
 | ||||
| }); | ||||
|  |  | |||
|  | @ -6819,6 +6819,10 @@ style-loader@^0.18.2: | |||
|     loader-utils "^1.0.2" | ||||
|     schema-utils "^0.3.0" | ||||
| 
 | ||||
| substring-trie@^1.0.0: | ||||
|   version "1.0.0" | ||||
|   resolved "https://registry.yarnpkg.com/substring-trie/-/substring-trie-1.0.0.tgz#5a7ecb83aefcca7b3720f7897cf69e97023be143" | ||||
| 
 | ||||
| sugarss@^1.0.0: | ||||
|   version "1.0.0" | ||||
|   resolved "https://registry.yarnpkg.com/sugarss/-/sugarss-1.0.0.tgz#65e51b3958432fb70d5451a68bb33e32d0cf1ef7" | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue