Update emoji list

Update emoji list to include all Unicode 14.0 emojis. GitHub shortcodes
are used, with fallback to Unicode CLDR annotations.
This commit is contained in:
yan 2023-02-01 21:59:59 +02:00
parent d5b078e084
commit 4a0974929b
2 changed files with 2570 additions and 679 deletions

File diff suppressed because it is too large Load Diff

67
tools/build_emojis.js Normal file
View File

@ -0,0 +1,67 @@
// Builds static/emoji.json
// GitHub shortcodes are used, with fallback to Unicode CLDR annotations
//
// curl -o gh-emojis.json https://api.github.com/emojis
// curl -o emoji-test.txt https://unicode.org/Public/emoji/14.0/emoji-test.txt
//
const fs = require('fs')
const GH_REGEX = /unicode\/(.*)\.png/
const CLDR_REGEX = /^(([0-9A-F]+ )+)\s*; ([^\s]+)\s+# [^ ]+ [^ ]+ (.*)/
const emojiFilename = '../static/emoji.json'
const ghEmojis = JSON.parse(fs.readFileSync('gh-emojis.json', 'utf8'))
const ghEmojiNames = {}
for (const name in ghEmojis) {
const match = GH_REGEX.exec(ghEmojis[name]);
if (match) {
const char = String.fromCodePoint(...match[1].split('-').map(cp => Number.parseInt(cp, 16)));
ghEmojiNames[char] = name;
}
}
function hasSkinToneIndicator(codepoints) {
return codepoints.some(cp => cp >= 0x1F3FB && cp <= 0x1F3FF)
}
const emojis = []
const cldrDataRaw = fs.readFileSync('./emoji-test.txt', 'utf-8')
for (const line of cldrDataRaw.split('\n')) {
const match = CLDR_REGEX.exec(line)
if (match) {
const codepoints = match[1].trim().split(' ').map(cp => Number.parseInt(cp, 16));
if (hasSkinToneIndicator(codepoints)) continue;
const char = String.fromCodePoint(...codepoints)
if (match[3] === 'fully-qualified') {
let slug = match[4].replaceAll(/[^A-Za-z0-9#*]+/g, '_').toLowerCase();
emojis.push({
keyword: ghEmojiNames[char] ?? slug,
value: char,
})
}
}
}
const collator = new Intl.Collator('en-u-co-emoji')
emojis.sort((a, b) => {
return collator.compare(a.value, b.value)
})
// Add regional indicators
// This is done after sorting, because Unicode collation would interleave
// these with country flags, and that's ugly
for (let char = 0x1F1E6; char <= 0x1F1FF; char++) {
const asciiChar = String.fromCodePoint(char-0x1F1E6+97)
emojis.push({
keyword: `regional_indicator_${asciiChar}`,
value: String.fromCodePoint(char)
})
}
fs.writeFile(emojiFilename, JSON.stringify(emojis, null, 2), 'utf8', (err) => {
if (err) console.log('Error writing file', err)
})