Compress and combine emoji data (#5229)

This commit is contained in:
Nolan Lawson 2017-10-05 18:42:34 -07:00 committed by Eugen Rochko
parent eb5ac23434
commit fd7f0732fe
22 changed files with 254 additions and 93 deletions

View File

@ -1,6 +1,6 @@
import api from '../api';
import { throttle } from 'lodash';
import { search as emojiSearch } from '../emoji_index_light';
import { search as emojiSearch } from '../features/emoji/emoji_mart_search_light';
import {
updateTimeline,

View File

@ -1,6 +1,6 @@
import React from 'react';
import PropTypes from 'prop-types';
import { unicodeMapping } from '../emojione_light';
import unicodeMapping from '../features/emoji/emoji_unicode_mapping_light';
const assetHost = process.env.CDN_HOST || '';
@ -23,7 +23,7 @@ export default class AutosuggestEmoji extends React.PureComponent {
return null;
}
url = `${assetHost}/emoji/${mapping[0]}.svg`;
url = `${assetHost}/emoji/${mapping.filename}.svg`;
}
return (

View File

@ -1,22 +0,0 @@
// @preval
const data = require('emoji-mart/dist/data').default;
const pick = require('lodash/pick');
const values = require('lodash/values');
const condensedEmojis = Object.keys(data.emojis).map(key => {
if (!data.emojis[key].short_names[0] === key) {
throw new Error('The condenser expects the first short_code to be the ' +
'key. It may need to be rewritten if the emoji change such that this ' +
'is no longer the case.');
}
return values(pick(data.emojis[key], ['short_names', 'unified', 'search']));
});
// JSON.parse/stringify is to emulate what @preval is doing and avoid any
// inconsistent behavior in dev mode
module.exports = JSON.parse(JSON.stringify({
emojis: condensedEmojis,
skins: data.skins,
categories: data.categories,
short_names: data.short_names,
}));

View File

@ -1,16 +0,0 @@
const data = require('./emoji_data_compressed');
// decompress
const emojis = {};
data.emojis.forEach(compressedEmoji => {
const [ short_names, unified, search ] = compressedEmoji;
emojis[short_names[0]] = {
short_names,
unified,
search,
};
});
data.emojis = emojis;
module.exports = data;

View File

@ -1,38 +0,0 @@
// @preval
// http://www.unicode.org/Public/emoji/5.0/emoji-test.txt
const emojis = require('./emoji_map.json');
const { emojiIndex } = require('emoji-mart');
const excluded = ['®', '©', '™'];
const skins = ['🏻', '🏼', '🏽', '🏾', '🏿'];
const shortcodeMap = {};
Object.keys(emojiIndex.emojis).forEach(key => {
shortcodeMap[emojiIndex.emojis[key].native] = emojiIndex.emojis[key].id;
});
const stripModifiers = unicode => {
skins.forEach(tone => {
unicode = unicode.replace(tone, '');
});
return unicode;
};
Object.keys(emojis).forEach(key => {
if (excluded.includes(key)) {
delete emojis[key];
return;
}
const normalizedKey = stripModifiers(key);
let shortcode = shortcodeMap[normalizedKey];
if (!shortcode) {
shortcode = shortcodeMap[normalizedKey + '\uFE0F'];
}
emojis[key] = [emojis[key], shortcode];
});
module.exports.unicodeMapping = emojis;

View File

@ -6,7 +6,7 @@ import Overlay from 'react-overlays/lib/Overlay';
import classNames from 'classnames';
import ImmutablePropTypes from 'react-immutable-proptypes';
import detectPassiveEvents from 'detect-passive-events';
import { buildCustomEmojis } from '../../../emoji';
import { buildCustomEmojis } from '../../emoji/emoji';
const messages = defineMessages({
emoji: { id: 'emoji_button.label', defaultMessage: 'Insert emoji' },

View File

@ -1,4 +1,4 @@
import { unicodeMapping } from './emojione_light';
import unicodeMapping from './emoji_unicode_mapping_light';
import Trie from 'substring-trie';
const trie = new Trie(Object.keys(unicodeMapping));
@ -35,8 +35,9 @@ const emojify = (str, customEmojis = {}) => {
if (!rend) break;
i = rend;
} else { // matched to unicode emoji
const [filename, shortCode] = unicodeMapping[match];
replacement = `<img draggable="false" class="emojione" alt="${match}" title=":${shortCode}:" src="${assetHost}/emoji/${filename}.svg" />`;
const { filename, shortCode } = unicodeMapping[match];
const title = shortCode ? `:${shortCode}:` : '';
replacement = `<img draggable="false" class="emojione" alt="${match}" title="${title}" src="${assetHost}/emoji/${filename}.svg" />`;
rend = i + match.length;
}
rtn += str.slice(0, i) + replacement;

View File

@ -0,0 +1,90 @@
// @preval
// http://www.unicode.org/Public/emoji/5.0/emoji-test.txt
// This file contains the compressed version of the emoji data from
// both emoji_map.json and from emoji-mart's emojiIndex and data objects.
// It's designed to be emitted in an array format to take up less space
// over the wire.
const { unicodeToFilename } = require('./unicode_to_filename');
const { unicodeToUnifiedName } = require('./unicode_to_unified_name');
const emojiMap = require('./emoji_map.json');
const { emojiIndex } = require('emoji-mart');
const emojiMartData = require('emoji-mart/dist/data').default;
const excluded = ['®', '©', '™'];
const skins = ['🏻', '🏼', '🏽', '🏾', '🏿'];
const shortcodeMap = {};
const shortCodesToEmojiData = {};
const emojisWithoutShortCodes = [];
Object.keys(emojiIndex.emojis).forEach(key => {
shortcodeMap[emojiIndex.emojis[key].native] = emojiIndex.emojis[key].id;
});
const stripModifiers = unicode => {
skins.forEach(tone => {
unicode = unicode.replace(tone, '');
});
return unicode;
};
Object.keys(emojiMap).forEach(key => {
if (excluded.includes(key)) {
delete emojiMap[key];
return;
}
const normalizedKey = stripModifiers(key);
let shortcode = shortcodeMap[normalizedKey];
if (!shortcode) {
shortcode = shortcodeMap[normalizedKey + '\uFE0F'];
}
const filename = emojiMap[key];
const filenameData = [key];
if (unicodeToFilename(key) !== filename) {
// filename can't be derived using unicodeToFilename
filenameData.push(filename);
}
if (typeof shortcode === 'undefined') {
emojisWithoutShortCodes.push(filenameData);
} else {
shortCodesToEmojiData[shortcode] = shortCodesToEmojiData[shortcode] || [[]];
shortCodesToEmojiData[shortcode][0].push(filenameData);
}
});
Object.keys(emojiIndex.emojis).forEach(key => {
const { native } = emojiIndex.emojis[key];
const { short_names, search, unified } = emojiMartData.emojis[key];
if (short_names[0] !== key) {
throw new Error('The compresser expects the first short_code to be the ' +
'key. It may need to be rewritten if the emoji change such that this ' +
'is no longer the case.');
}
short_names.splice(0, 1); // first short name can be inferred from the key
const searchData = [native, short_names, search];
if (unicodeToUnifiedName(native) !== unified) {
// unified name can't be derived from unicodeToUnifiedName
searchData.push(unified);
}
shortCodesToEmojiData[key].push(searchData);
});
// JSON.parse/stringify is to emulate what @preval is doing and avoid any
// inconsistent behavior in dev mode
module.exports = JSON.parse(JSON.stringify([
shortCodesToEmojiData,
emojiMartData.skins,
emojiMartData.categories,
emojiMartData.short_names,
emojisWithoutShortCodes,
]));

View File

@ -0,0 +1,41 @@
// The output of this module is designed to mimic emoji-mart's
// "data" object, such that we can use it for a light version of emoji-mart's
// emojiIndex.search functionality.
const { unicodeToUnifiedName } = require('./unicode_to_unified_name');
const [ shortCodesToEmojiData, skins, categories, short_names ] = require('./emoji_compressed');
const emojis = {};
// decompress
Object.keys(shortCodesToEmojiData).forEach((shortCode) => {
let [
filenameData, // eslint-disable-line no-unused-vars
searchData,
] = shortCodesToEmojiData[shortCode];
let [
native,
short_names,
search,
unified,
] = searchData;
if (!unified) {
// unified name can be derived from unicodeToUnifiedName
unified = unicodeToUnifiedName(native);
}
short_names = [shortCode].concat(short_names);
emojis[shortCode] = {
native,
search,
short_names,
unified,
};
});
module.exports = {
emojis,
skins,
categories,
short_names,
};

View File

@ -1,7 +1,7 @@
// This code is largely borrowed from:
// https://github.com/missive/emoji-mart/blob/bbd4fbe/src/utils/emoji-index.js
import data from './emoji_data_light';
import data from './emoji_mart_data_light';
import { getData, getSanitizedData, intersect } from './emoji_utils';
let index = {};

View File

@ -0,0 +1,35 @@
// A mapping of unicode strings to an object containing the filename
// (i.e. the svg filename) and a shortCode intended to be shown
// as a "title" attribute in an HTML element (aka tooltip).
const [
shortCodesToEmojiData,
skins, // eslint-disable-line no-unused-vars
categories, // eslint-disable-line no-unused-vars
short_names, // eslint-disable-line no-unused-vars
emojisWithoutShortCodes,
] = require('./emoji_compressed');
const { unicodeToFilename } = require('./unicode_to_filename');
// decompress
const unicodeMapping = {};
function processEmojiMapData(emojiMapData, shortCode) {
let [ native, filename ] = emojiMapData;
if (!filename) {
// filename name can be derived from unicodeToFilename
filename = unicodeToFilename(native);
}
unicodeMapping[native] = {
shortCode: shortCode,
filename: filename,
};
}
Object.keys(shortCodesToEmojiData).forEach((shortCode) => {
let [ filenameData ] = shortCodesToEmojiData[shortCode];
filenameData.forEach(emojiMapData => processEmojiMapData(emojiMapData, shortCode));
});
emojisWithoutShortCodes.forEach(emojiMapData => processEmojiMapData(emojiMapData));
module.exports = unicodeMapping;

View File

@ -1,7 +1,7 @@
// This code is largely borrowed from:
// https://github.com/missive/emoji-mart/blob/bbd4fbe/src/utils/index.js
import data from './emoji_data_light';
import data from './emoji_mart_data_light';
const COLONS_REGEX = /^(?:\:([^\:]+)\:)(?:\:skin-tone-(\d)\:)?$/;

View File

@ -0,0 +1,26 @@
// taken from:
// https://github.com/twitter/twemoji/blob/47732c7/twemoji-generator.js#L848-L866
exports.unicodeToFilename = (str) => {
let result = '';
let charCode = 0;
let p = 0;
let i = 0;
while (i < str.length) {
charCode = str.charCodeAt(i++);
if (p) {
if (result.length > 0) {
result += '-';
}
result += (0x10000 + ((p - 0xD800) << 10) + (charCode - 0xDC00)).toString(16);
p = 0;
} else if (0xD800 <= charCode && charCode <= 0xDBFF) {
p = charCode;
} else {
if (result.length > 0) {
result += '-';
}
result += charCode.toString(16);
}
}
return result;
};

View File

@ -0,0 +1,17 @@
function padLeft(str, num) {
while (str.length < num) {
str = '0' + str;
}
return str;
}
exports.unicodeToUnifiedName = (str) => {
let output = '';
for (let i = 0; i < str.length; i += 2) {
if (i > 0) {
output += '-';
}
output += padLeft(str.codePointAt(i).toString(16).toUpperCase(), 4);
}
return output;
};

View File

@ -44,7 +44,7 @@ import {
FAVOURITED_STATUSES_EXPAND_SUCCESS,
} from '../actions/favourites';
import { STORE_HYDRATE } from '../actions/store';
import emojify from '../emoji';
import emojify from '../features/emoji/emoji';
import { Map as ImmutableMap, fromJS } from 'immutable';
import escapeTextContentForBrowser from 'escape-html';

View File

@ -1,7 +1,7 @@
import { List as ImmutableList } from 'immutable';
import { STORE_HYDRATE } from '../actions/store';
import { search as emojiSearch } from '../emoji_index_light';
import { buildCustomEmojis } from '../emoji';
import { search as emojiSearch } from '../features/emoji/emoji_mart_search_light';
import { buildCustomEmojis } from '../features/emoji/emoji';
const initialState = ImmutableList();

View File

@ -39,7 +39,7 @@ import {
PINNED_STATUSES_FETCH_SUCCESS,
} from '../actions/pin_statuses';
import { SEARCH_FETCH_SUCCESS } from '../actions/search';
import emojify from '../emoji';
import emojify from '../features/emoji/emoji';
import { Map as ImmutableMap, fromJS } from 'immutable';
import escapeTextContentForBrowser from 'escape-html';

View File

@ -21,7 +21,7 @@ function main() {
const { length } = require('stringz');
const IntlRelativeFormat = require('intl-relativeformat').default;
const { delegate } = require('rails-ujs');
const emojify = require('../mastodon/emoji').default;
const emojify = require('../mastodon/features/emoji/emoji').default;
const { getLocale } = require('../mastodon/locales');
const { localeData } = getLocale();
const VideoContainer = require('../mastodon/containers/video_container').default;

View File

@ -17,7 +17,7 @@ namespace :emojis do
task :generate do
source = 'http://www.unicode.org/Public/emoji/5.0/emoji-test.txt'
codes = []
dest = Rails.root.join('app', 'javascript', 'mastodon', 'emoji_map.json')
dest = Rails.root.join('app', 'javascript', 'mastodon', 'features', 'emoji', 'emoji_map.json')
puts "Downloading emojos from source... (#{source})"

View File

@ -1,5 +1,5 @@
import { expect } from 'chai';
import { search } from '../../../app/javascript/mastodon/emoji_index_light';
import { search } from '../../../app/javascript/mastodon/features/emoji/emoji_mart_search_light';
import { emojiIndex } from 'emoji-mart';
import { pick } from 'lodash';
@ -78,4 +78,22 @@ describe('emoji_index', () => {
expect(emojiIndex.search('flag', { include: ['people'] }))
.to.deep.equal([]);
});
it('does an emoji whose unified name is irregular', () => {
let expected = [{
'id': 'water_polo',
'unified': '1f93d',
'native': '🤽',
}, {
'id': 'man-playing-water-polo',
'unified': '1f93d-200d-2642-fe0f',
'native': '🤽‍♂️',
}, {
'id': 'woman-playing-water-polo',
'unified': '1f93d-200d-2640-fe0f',
'native': '🤽‍♀️',
}];
expect(search('polo').map(trimEmojis)).to.deep.equal(expected);
expect(emojiIndex.search('polo').map(trimEmojis)).to.deep.equal(expected);
});
});

View File

@ -1,5 +1,5 @@
import { expect } from 'chai';
import emojify from '../../../app/javascript/mastodon/emoji';
import emojify from '../../../app/javascript/mastodon/features/emoji/emoji';
describe('emojify', () => {
it('ignores unknown shortcodes', () => {
@ -49,4 +49,13 @@ describe('emojify', () => {
expect(emojify('👌🌈💕')).to.equal('<img draggable="false" class="emojione" alt="👌" title=":ok_hand:" src="/emoji/1f44c.svg" /><img draggable="false" class="emojione" alt="🌈" title=":rainbow:" src="/emoji/1f308.svg" /><img draggable="false" class="emojione" alt="💕" title=":two_hearts:" src="/emoji/1f495.svg" />');
expect(emojify('👌 🌈 💕')).to.equal('<img draggable="false" class="emojione" alt="👌" title=":ok_hand:" src="/emoji/1f44c.svg" /> <img draggable="false" class="emojione" alt="🌈" title=":rainbow:" src="/emoji/1f308.svg" /> <img draggable="false" class="emojione" alt="💕" title=":two_hearts:" src="/emoji/1f495.svg" />');
});
it('does an emoji that has no shortcode', () => {
expect(emojify('🕉️')).to.equal('<img draggable="false" class="emojione" alt="🕉️" title="" src="/emoji/1f549.svg" />');
});
it('does an emoji whose filename is irregular', () => {
expect(emojify('↙️')).to.equal('<img draggable="false" class="emojione" alt="↙️" title=":arrow_lower_left:" src="/emoji/2199.svg" />');
});
});