From 20ce6468520e76b0fb2931a5fac368157d950b1d Mon Sep 17 00:00:00 2001 From: Henry Jameson Date: Mon, 7 Jun 2021 03:14:48 +0300 Subject: [PATCH 01/93] [WIP] MUCH better approach to replacing emojis with still versions --- .babelrc | 4 +- package.json | 4 +- src/components/rich_content/rich_content.jsx | 66 +++++++++ src/components/rich_content/rich_content.scss | 0 .../status_content/status_content.js | 6 +- .../status_content/status_content.vue | 14 +- .../entity_normalizer.service.js | 3 + .../mini_html_converter.service.js | 137 ++++++++++++++++++ .../mini_post_html_processor.spec.js | 130 +++++++++++++++++ 9 files changed, 350 insertions(+), 14 deletions(-) create mode 100644 src/components/rich_content/rich_content.jsx create mode 100644 src/components/rich_content/rich_content.scss create mode 100644 src/services/mini_html_converter/mini_html_converter.service.js create mode 100644 test/unit/specs/services/tiny_post_html_processor/mini_post_html_processor.spec.js diff --git a/.babelrc b/.babelrc index 3c732dd1..94521147 100644 --- a/.babelrc +++ b/.babelrc @@ -1,5 +1,5 @@ { - "presets": ["@babel/preset-env"], - "plugins": ["@babel/plugin-transform-runtime", "lodash", "@vue/babel-plugin-transform-vue-jsx"], + "presets": ["@babel/preset-env", "@vue/babel-preset-jsx"], + "plugins": ["@babel/plugin-transform-runtime", "lodash"], "comments": false } diff --git a/package.json b/package.json index 99301266..5134a8b1 100644 --- a/package.json +++ b/package.json @@ -47,8 +47,8 @@ "@babel/preset-env": "^7.7.6", "@babel/register": "^7.7.4", "@ungap/event-target": "^0.1.0", - "@vue/babel-helper-vue-jsx-merge-props": "^1.0.0", - "@vue/babel-plugin-transform-vue-jsx": "^1.1.2", + "@vue/babel-helper-vue-jsx-merge-props": "^1.2.1", + "@vue/babel-preset-jsx": "^1.2.4", "@vue/test-utils": "^1.0.0-beta.26", "autoprefixer": "^6.4.0", "babel-eslint": "^7.0.0", diff --git a/src/components/rich_content/rich_content.jsx b/src/components/rich_content/rich_content.jsx new file mode 100644 index 00000000..3b29eb4c --- /dev/null +++ b/src/components/rich_content/rich_content.jsx @@ -0,0 +1,66 @@ +import Vue from 'vue' +import { mapGetters } from 'vuex' +import { processHtml } from 'src/services/tiny_post_html_processor/tiny_post_html_processor.service.js' +import { convertHtml, getTagName, processTextForEmoji, getAttrs } from 'src/services/mini_html_converter/mini_html_converter.service.js' +import { mentionMatchesUrl, extractTagFromUrl } from 'src/services/matcher/matcher.service.js' +import generateProfileLink from 'src/services/user_profile_link_generator/user_profile_link_generator' +import StillImage from 'src/components/still-image/still-image.vue' + +import './rich_content.scss' + +export default Vue.component('RichContent', { + name: 'RichContent', + props: { + html: { + required: true, + type: String + }, + emoji: { + required: true, + type: Array + } + }, + render (h) { + const renderImage = (tag) => { + return + } + const structure = convertHtml(this.html) + const processItem = (item) => { + if (typeof item === 'string') { + if (item.includes(':')) { + return processTextForEmoji( + item, + this.emoji, + ({ shortcode, url }) => { + return + } + ) + } else { + return item + } + } + if (Array.isArray(item)) { + const [opener, children] = item + const Tag = getTagName(opener) + if (Tag === 'img') { + return renderImage(opener) + } + if (children !== undefined) { + return + { children.map(processItem) } + + } else { + return + } + } + } + return
+ { structure.map(processItem) } +
+ } +}) diff --git a/src/components/rich_content/rich_content.scss b/src/components/rich_content/rich_content.scss new file mode 100644 index 00000000..e69de29b diff --git a/src/components/status_content/status_content.js b/src/components/status_content/status_content.js index a6f79d76..571f1a78 100644 --- a/src/components/status_content/status_content.js +++ b/src/components/status_content/status_content.js @@ -1,6 +1,7 @@ import Attachment from '../attachment/attachment.vue' import Poll from '../poll/poll.vue' import Gallery from '../gallery/gallery.vue' +import RichContent from 'src/components/rich_content/rich_content.jsx' import LinkPreview from '../link-preview/link-preview.vue' import generateProfileLink from 'src/services/user_profile_link_generator/user_profile_link_generator' import fileType from 'src/services/file_type/file_type.service' @@ -125,7 +126,7 @@ const StatusContent = { return this.mergedConfig.maxThumbnails }, postBodyHtml () { - const html = this.status.statusnet_html + const html = this.status.raw_html if (this.mergedConfig.greentext) { try { @@ -164,7 +165,8 @@ const StatusContent = { Attachment, Poll, Gallery, - LinkPreview + LinkPreview, + RichContent }, methods: { linkClicked (event) { diff --git a/src/components/status_content/status_content.vue b/src/components/status_content/status_content.vue index 90bfaf40..c1a78db9 100644 --- a/src/components/status_content/status_content.vue +++ b/src/components/status_content/status_content.vue @@ -1,5 +1,4 @@ diff --git a/src/services/entity_normalizer/entity_normalizer.service.js b/src/services/entity_normalizer/entity_normalizer.service.js index a4ddf927..9f63feb6 100644 --- a/src/services/entity_normalizer/entity_normalizer.service.js +++ b/src/services/entity_normalizer/entity_normalizer.service.js @@ -267,6 +267,8 @@ export const parseStatus = (data) => { output.nsfw = data.sensitive output.statusnet_html = addEmojis(data.content, data.emojis) + output.raw_html = data.content + output.emojis = data.emojis output.tags = data.tags @@ -293,6 +295,7 @@ export const parseStatus = (data) => { output.retweeted_status = parseStatus(data.reblog) } + output.summary_raw_html = escape(data.spoiler_text) output.summary_html = addEmojis(escape(data.spoiler_text), data.emojis) output.external_url = data.url output.poll = data.poll diff --git a/src/services/mini_html_converter/mini_html_converter.service.js b/src/services/mini_html_converter/mini_html_converter.service.js new file mode 100644 index 00000000..00d20019 --- /dev/null +++ b/src/services/mini_html_converter/mini_html_converter.service.js @@ -0,0 +1,137 @@ +/** + * This is a not-so-tiny purpose-built HTML parser/processor. It was made for use + * with StatusText component for purpose of replacing tags with vue components + * + * known issue: doesn't handle CDATA so nested CDATA might not work well + * + * @param {Object} input - input data + * @param {(string) => string} lineProcessor - function that will be called on every line + * @param {{ key[string]: (string) => string}} tagProcessor - map of processors for tags + * @return {string} processed html + */ +export const convertHtml = (html) => { + // Elements that are implicitly self-closing + // https://developer.mozilla.org/en-US/docs/Glossary/empty_element + const emptyElements = new Set([ + 'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', + 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr' + ]) + // TODO For future - also parse HTML5 multi-source components? + + const buffer = [] // Current output buffer + const levels = [['', buffer]] // How deep we are in tags and which tags were there + let textBuffer = '' // Current line content + let tagBuffer = null // Current tag buffer, if null = we are not currently reading a tag + + const getCurrentBuffer = () => { + return levels[levels.length - 1][1] + } + + const flushText = () => { // Processes current line buffer, adds it to output buffer and clears line buffer + if (textBuffer === '') return + getCurrentBuffer().push(textBuffer) + textBuffer = '' + } + + const handleSelfClosing = (tag) => { + getCurrentBuffer().push([tag]) + } + + const handleOpen = (tag) => { + const curBuf = getCurrentBuffer() + const newLevel = [tag, []] + levels.push(newLevel) + curBuf.push(newLevel) + } + + const handleClose = (tag) => { + const currentTag = levels[levels.length - 1] + if (getTagName(levels[levels.length - 1][0]) === getTagName(tag)) { + currentTag.push(tag) + levels.pop() + } else { + getCurrentBuffer().push(tag) + } + } + + for (let i = 0; i < html.length; i++) { + const char = html[i] + if (char === '<' && tagBuffer === null) { + flushText() + tagBuffer = char + } else if (char !== '>' && tagBuffer !== null) { + tagBuffer += char + } else if (char === '>' && tagBuffer !== null) { + tagBuffer += char + const tagFull = tagBuffer + tagBuffer = null + const tagName = getTagName(tagFull) + if (tagFull[1] === '/') { + handleClose(tagFull) + } else if (emptyElements.has(tagName) || tagFull[tagFull.length - 2] === '/') { + // self-closing + handleSelfClosing(tagFull) + } else { + handleOpen(tagFull) + } + } else { + textBuffer += char + } + } + if (tagBuffer) { + textBuffer += tagBuffer + } + + flushText() + return buffer +} + +// Extracts tag name from tag, i.e. => span +export const getTagName = (tag) => { + const result = /(?:<\/(\w+)>|<(\w+)\s?.*?\/?>)/gi.exec(tag) + return result && (result[1] || result[2]) +} + +export const processTextForEmoji = (text, emojis, processor) => { + const buffer = [] + let textBuffer = '' + for (let i = 0; i < text.length; i++) { + const char = text[i] + if (char === ':') { + const next = text.slice(i + 1) + let found = false + for (let emoji of emojis) { + if (next.slice(0, emoji.shortcode.length + 1) === (emoji.shortcode + ':')) { + found = emoji + break + } + } + if (found) { + buffer.push(textBuffer) + textBuffer = '' + buffer.push(processor(found)) + i += found.shortcode.length + 1 + } else { + textBuffer += char + } + } else { + textBuffer += char + } + } + return buffer +} + +export const getAttrs = tag => { + const innertag = tag + .substring(1, tag.length - 1) + .replace(new RegExp('^' + getTagName(tag)), '') + .replace(/\/?$/, '') + .trim() + const attrs = Array.from(innertag.matchAll(/([a-z0-9-]+)(?:=(?:"([^"]+?)"|'([^']+?)'))?/gi)) + .map(([trash, key, value]) => [key, value]) + .map(([k, v]) => { + if (!v) return [k, true] + return [k, v] + }) + return Object.fromEntries(attrs) +} diff --git a/test/unit/specs/services/tiny_post_html_processor/mini_post_html_processor.spec.js b/test/unit/specs/services/tiny_post_html_processor/mini_post_html_processor.spec.js new file mode 100644 index 00000000..41818f57 --- /dev/null +++ b/test/unit/specs/services/tiny_post_html_processor/mini_post_html_processor.spec.js @@ -0,0 +1,130 @@ +import { convertHtml, processTextForEmoji } from 'src/services/mini_html_converter/mini_html_converter.service.js' + +describe('MiniHtmlConverter', () => { + describe('convertHtml', () => { + it('converts html into a tree structure', () => { + const inputOutput = '1

2

345' + expect(convertHtml(inputOutput)).to.eql([ + '1 ', + [ + '

', + ['2'], + '

' + ], + ' ', + [ + '', + [ + '3', + [''], + '4' + ], + '' + ], + '5' + ]) + }) + it('converts html to tree while preserving tag formatting', () => { + const inputOutput = '1

2

345' + expect(convertHtml(inputOutput)).to.eql([ + '1 ', + [ + '

', + ['2'], + '

' + ], + [ + '', + [ + '3', + [''], + '4' + ], + '' + ], + '5' + ]) + }) + it('converts semi-broken html', () => { + const inputOutput = '1
2

42' + expect(convertHtml(inputOutput)).to.eql([ + '1 ', + ['
'], + ' 2 ', + [ + '

', + [' 42'] + ] + ]) + }) + it('realistic case', () => { + const inputOutput = '

@benis @hj nice

' + expect(convertHtml(inputOutput)).to.eql([ + [ + '

', + [ + [ + '', + [ + [ + '', + [ + '@', + [ + '', + [ + 'benis' + ], + '' + ] + ], + '' + ] + ], + '' + ], + ' ', + [ + '', + [ + [ + '', + [ + '@', + [ + '', + [ + 'hj' + ], + '' + ] + ], + '' + ] + ], + '' + ], + ' nice' + ], + '

' + ] + ]) + }) + }) + describe('processTextForEmoji', () => { + it('processes all emoji in text', () => { + const inputOutput = 'Hello from finland! :lol: We have best water! :lmao:' + const emojis = [ + { shortcode: 'lol', src: 'LOL' }, + { shortcode: 'lmao', src: 'LMAO' } + ] + const processor = ({ shortcode, src }) => ({ shortcode, src }) + expect(processTextForEmoji(inputOutput, emojis, processor)).to.eql([ + 'Hello from finland! ', + { shortcode: 'lol', src: 'LOL' }, + ' We have best water! ', + { shortcode: 'lmao', src: 'LMAO' } + ]) + }) + }) +}) From 5970ddf9ac5ed7b7a855ed4c025a2f5f62e256fa Mon Sep 17 00:00:00 2001 From: Henry Jameson Date: Mon, 7 Jun 2021 12:22:15 +0300 Subject: [PATCH 02/93] fix escaped apostrophes --- src/components/rich_content/rich_content.jsx | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/components/rich_content/rich_content.jsx b/src/components/rich_content/rich_content.jsx index 3b29eb4c..a5c010de 100644 --- a/src/components/rich_content/rich_content.jsx +++ b/src/components/rich_content/rich_content.jsx @@ -1,9 +1,6 @@ import Vue from 'vue' -import { mapGetters } from 'vuex' -import { processHtml } from 'src/services/tiny_post_html_processor/tiny_post_html_processor.service.js' +import { unescape } from 'lodash' import { convertHtml, getTagName, processTextForEmoji, getAttrs } from 'src/services/mini_html_converter/mini_html_converter.service.js' -import { mentionMatchesUrl, extractTagFromUrl } from 'src/services/matcher/matcher.service.js' -import generateProfileLink from 'src/services/user_profile_link_generator/user_profile_link_generator' import StillImage from 'src/components/still-image/still-image.vue' import './rich_content.scss' @@ -29,7 +26,7 @@ export default Vue.component('RichContent', { if (typeof item === 'string') { if (item.includes(':')) { return processTextForEmoji( - item, + unescape(item), this.emoji, ({ shortcode, url }) => { return Date: Mon, 7 Jun 2021 12:24:11 +0300 Subject: [PATCH 03/93] lint --- src/components/emoji_input/emoji_input.vue | 2 +- src/components/status_content/status_content.vue | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/components/emoji_input/emoji_input.vue b/src/components/emoji_input/emoji_input.vue index e6f9a9d3..aa2950ce 100644 --- a/src/components/emoji_input/emoji_input.vue +++ b/src/components/emoji_input/emoji_input.vue @@ -1,9 +1,9 @@