Faster emojify() by avoiding str.replace() entirely (#4049)
This commit is contained in:
parent
331f0953e9
commit
e282580101
4 changed files with 71 additions and 37 deletions
|
@ -1,60 +1,55 @@
|
||||||
import emojione from 'emojione';
|
import emojione from 'emojione';
|
||||||
|
import Trie from 'substring-trie';
|
||||||
|
|
||||||
const toImage = str => shortnameToImage(unicodeToImage(str));
|
const mappedUnicode = emojione.mapUnicodeToShort();
|
||||||
|
const trie = new Trie(Object.keys(emojione.jsEscapeMap));
|
||||||
|
|
||||||
const unicodeToImage = str => {
|
function emojify(str) {
|
||||||
const mappedUnicode = emojione.mapUnicodeToShort();
|
// This walks through the string from start to end, ignoring any tags (<p>, <br>, etc.)
|
||||||
|
// and replacing valid shortnames like :smile: and :wink: as well as unicode strings
|
||||||
return str.replace(emojione.regUnicode, unicodeChar => {
|
// that _aren't_ within tags with an <img> version.
|
||||||
if (typeof unicodeChar === 'undefined' || unicodeChar === '' || !(unicodeChar in emojione.jsEscapeMap)) {
|
// The goal is to be the same as an emojione.regShortNames/regUnicode replacement, but faster.
|
||||||
return unicodeChar;
|
let i = -1;
|
||||||
}
|
|
||||||
|
|
||||||
const unicode = emojione.jsEscapeMap[unicodeChar];
|
|
||||||
const short = mappedUnicode[unicode];
|
|
||||||
const filename = emojione.emojioneList[short].fname;
|
|
||||||
const alt = emojione.convert(unicode.toUpperCase());
|
|
||||||
|
|
||||||
return `<img draggable="false" class="emojione" alt="${alt}" title="${short}" src="/emoji/${filename}.svg" />`;
|
|
||||||
});
|
|
||||||
};
|
|
||||||
|
|
||||||
const shortnameToImage = str => {
|
|
||||||
// This walks through the string from end to start, ignoring any tags (<p>, <br>, etc.)
|
|
||||||
// and replacing valid shortnames like :smile: and :wink: that _aren't_ within
|
|
||||||
// tags with an <img> version.
|
|
||||||
// The goal is to be the same as an emojione.regShortNames replacement, but faster.
|
|
||||||
// The reason we go backwards is because then we can replace substrings as we go.
|
|
||||||
let i = str.length;
|
|
||||||
let insideTag = false;
|
let insideTag = false;
|
||||||
let insideShortname = false;
|
let insideShortname = false;
|
||||||
let shortnameEndIndex = -1;
|
let shortnameStartIndex = -1;
|
||||||
while (i--) {
|
let match;
|
||||||
|
while (++i < str.length) {
|
||||||
const char = str.charAt(i);
|
const char = str.charAt(i);
|
||||||
if (insideShortname && char === ':') {
|
if (insideShortname && char === ':') {
|
||||||
const shortname = str.substring(i, shortnameEndIndex + 1);
|
const shortname = str.substring(shortnameStartIndex, i + 1);
|
||||||
if (shortname in emojione.emojioneList) {
|
if (shortname in emojione.emojioneList) {
|
||||||
const unicode = emojione.emojioneList[shortname].unicode[emojione.emojioneList[shortname].unicode.length - 1];
|
const unicode = emojione.emojioneList[shortname].unicode[emojione.emojioneList[shortname].unicode.length - 1];
|
||||||
const alt = emojione.convert(unicode.toUpperCase());
|
const alt = emojione.convert(unicode.toUpperCase());
|
||||||
const replacement = `<img draggable="false" class="emojione" alt="${alt}" title="${shortname}" src="/emoji/${unicode}.svg" />`;
|
const replacement = `<img draggable="false" class="emojione" alt="${alt}" title="${shortname}" src="/emoji/${unicode}.svg" />`;
|
||||||
str = str.substring(0, i) + replacement + str.substring(shortnameEndIndex + 1);
|
str = str.substring(0, shortnameStartIndex) + replacement + str.substring(i + 1);
|
||||||
|
i += (replacement.length - shortname.length - 1); // jump ahead the length we've added to the string
|
||||||
} else {
|
} else {
|
||||||
i++; // stray colon, try again
|
i--; // stray colon, try again
|
||||||
}
|
}
|
||||||
insideShortname = false;
|
insideShortname = false;
|
||||||
} else if (insideTag && char === '<') {
|
} else if (insideTag && char === '>') {
|
||||||
insideTag = false;
|
insideTag = false;
|
||||||
} else if (char === '>') {
|
} else if (char === '<') {
|
||||||
insideTag = true;
|
insideTag = true;
|
||||||
insideShortname = false;
|
insideShortname = false;
|
||||||
} else if (!insideTag && char === ':') {
|
} else if (!insideTag && char === ':') {
|
||||||
insideShortname = true;
|
insideShortname = true;
|
||||||
shortnameEndIndex = i;
|
shortnameStartIndex = i;
|
||||||
|
} else if (!insideTag && (match = trie.search(str.substring(i)))) {
|
||||||
|
const unicodeStr = match;
|
||||||
|
if (unicodeStr in emojione.jsEscapeMap) {
|
||||||
|
const unicode = emojione.jsEscapeMap[unicodeStr];
|
||||||
|
const short = mappedUnicode[unicode];
|
||||||
|
const filename = emojione.emojioneList[short].fname;
|
||||||
|
const alt = emojione.convert(unicode.toUpperCase());
|
||||||
|
const replacement = `<img draggable="false" class="emojione" alt="${alt}" title="${short}" src="/emoji/${filename}.svg" />`;
|
||||||
|
str = str.substring(0, i) + replacement + str.substring(i + unicodeStr.length);
|
||||||
|
i += (replacement.length - unicodeStr.length); // jump ahead the length we've added to the string
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return str;
|
return str;
|
||||||
};
|
}
|
||||||
|
|
||||||
export default function emojify(text) {
|
export default emojify;
|
||||||
return toImage(text);
|
|
||||||
};
|
|
||||||
|
|
|
@ -102,6 +102,7 @@
|
||||||
"sass-loader": "^6.0.6",
|
"sass-loader": "^6.0.6",
|
||||||
"stringz": "^0.2.2",
|
"stringz": "^0.2.2",
|
||||||
"style-loader": "^0.18.2",
|
"style-loader": "^0.18.2",
|
||||||
|
"substring-trie": "^1.0.0",
|
||||||
"throng": "^4.0.0",
|
"throng": "^4.0.0",
|
||||||
"tiny-queue": "^0.2.1",
|
"tiny-queue": "^0.2.1",
|
||||||
"uuid": "^3.1.0",
|
"uuid": "^3.1.0",
|
||||||
|
|
|
@ -46,4 +46,38 @@ describe('emojify', () => {
|
||||||
expect(emojify(':smile')).to.equal(':smile');
|
expect(emojify(':smile')).to.equal(':smile');
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('does two emoji next to each other', () => {
|
||||||
|
expect(emojify(':smile::wink:')).to.equal(
|
||||||
|
'<img draggable="false" class="emojione" alt="😄" title=":smile:" src="/emoji/1f604.svg" /><img draggable="false" class="emojione" alt="😉" title=":wink:" src="/emoji/1f609.svg" />');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('does unicode', () => {
|
||||||
|
expect(emojify('\uD83D\uDC69\u200D\uD83D\uDC69\u200D\uD83D\uDC66\u200D\uD83D\uDC66')).to.equal(
|
||||||
|
'<img draggable="false" class="emojione" alt="👩👩👦👦" title=":family_wwbb:" src="/emoji/1f469-1f469-1f466-1f466.svg" />');
|
||||||
|
expect(emojify('\uD83D\uDC68\uD83D\uDC69\uD83D\uDC67\uD83D\uDC67')).to.equal(
|
||||||
|
'<img draggable="false" class="emojione" alt="👨👩👧👧" title=":family_mwgg:" src="/emoji/1f468-1f469-1f467-1f467.svg" />');
|
||||||
|
expect(emojify('\uD83D\uDC69\uD83D\uDC69\uD83D\uDC66')).to.equal('<img draggable="false" class="emojione" alt="👩👩👦" title=":family_wwb:" src="/emoji/1f469-1f469-1f466.svg" />');
|
||||||
|
expect(emojify('\u2757')).to.equal(
|
||||||
|
'<img draggable="false" class="emojione" alt="❗" title=":exclamation:" src="/emoji/2757.svg" />');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('does multiple unicode', () => {
|
||||||
|
expect(emojify('\u2757 #\uFE0F\u20E3')).to.equal(
|
||||||
|
'<img draggable="false" class="emojione" alt="❗" title=":exclamation:" src="/emoji/2757.svg" /> <img draggable="false" class="emojione" alt="#️⃣" title=":hash:" src="/emoji/0023-20e3.svg" />');
|
||||||
|
expect(emojify('\u2757#\uFE0F\u20E3')).to.equal(
|
||||||
|
'<img draggable="false" class="emojione" alt="❗" title=":exclamation:" src="/emoji/2757.svg" /><img draggable="false" class="emojione" alt="#️⃣" title=":hash:" src="/emoji/0023-20e3.svg" />');
|
||||||
|
expect(emojify('\u2757 #\uFE0F\u20E3 \u2757')).to.equal(
|
||||||
|
'<img draggable="false" class="emojione" alt="❗" title=":exclamation:" src="/emoji/2757.svg" /> <img draggable="false" class="emojione" alt="#️⃣" title=":hash:" src="/emoji/0023-20e3.svg" /> <img draggable="false" class="emojione" alt="❗" title=":exclamation:" src="/emoji/2757.svg" />');
|
||||||
|
expect(emojify('foo \u2757 #\uFE0F\u20E3 bar')).to.equal(
|
||||||
|
'foo <img draggable="false" class="emojione" alt="❗" title=":exclamation:" src="/emoji/2757.svg" /> <img draggable="false" class="emojione" alt="#️⃣" title=":hash:" src="/emoji/0023-20e3.svg" /> bar');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('does mixed unicode and shortnames', () => {
|
||||||
|
expect(emojify(':smile:#\uFE0F\u20E3:wink:\u2757')).to.equal('<img draggable="false" class="emojione" alt="😄" title=":smile:" src="/emoji/1f604.svg" /><img draggable="false" class="emojione" alt="#️⃣" title=":hash:" src="/emoji/0023-20e3.svg" /><img draggable="false" class="emojione" alt="😉" title=":wink:" src="/emoji/1f609.svg" /><img draggable="false" class="emojione" alt="❗" title=":exclamation:" src="/emoji/2757.svg" />');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('ignores unicode inside of tags', () => {
|
||||||
|
expect(emojify('<p data-foo="\uD83D\uDC69\uD83D\uDC69\uD83D\uDC66"></p>')).to.equal('<p data-foo="\uD83D\uDC69\uD83D\uDC69\uD83D\uDC66"></p>');
|
||||||
|
});
|
||||||
|
|
||||||
});
|
});
|
||||||
|
|
|
@ -6819,6 +6819,10 @@ style-loader@^0.18.2:
|
||||||
loader-utils "^1.0.2"
|
loader-utils "^1.0.2"
|
||||||
schema-utils "^0.3.0"
|
schema-utils "^0.3.0"
|
||||||
|
|
||||||
|
substring-trie@^1.0.0:
|
||||||
|
version "1.0.0"
|
||||||
|
resolved "https://registry.yarnpkg.com/substring-trie/-/substring-trie-1.0.0.tgz#5a7ecb83aefcca7b3720f7897cf69e97023be143"
|
||||||
|
|
||||||
sugarss@^1.0.0:
|
sugarss@^1.0.0:
|
||||||
version "1.0.0"
|
version "1.0.0"
|
||||||
resolved "https://registry.yarnpkg.com/sugarss/-/sugarss-1.0.0.tgz#65e51b3958432fb70d5451a68bb33e32d0cf1ef7"
|
resolved "https://registry.yarnpkg.com/sugarss/-/sugarss-1.0.0.tgz#65e51b3958432fb70d5451a68bb33e32d0cf1ef7"
|
||||||
|
|
Loading…
Reference in a new issue