Johann150
b94aeb2df2
Some checks failed
ci/woodpecker/push/lint-sw Pipeline failed
ci/woodpecker/push/lint-backend Pipeline failed
ci/woodpecker/push/lint-foundkey-js Pipeline was successful
ci/woodpecker/push/lint-client Pipeline failed
ci/woodpecker/push/build Pipeline was successful
ci/woodpecker/push/test Pipeline failed
Changelog: Fixed
281 lines
6.7 KiB
TypeScript
281 lines
6.7 KiB
TypeScript
import { URL } from 'node:url';
|
|
import * as parse5 from 'parse5';
|
|
import * as TreeAdapter from 'parse5/dist/tree-adapters/default';
|
|
|
|
const treeAdapter = parse5.defaultTreeAdapter;
|
|
|
|
const urlRegex = /^https?:\/\/[\w\/:%#@$&?!()\[\]~.,=+\-]+/;
|
|
const urlRegexFull = /^https?:\/\/[\w\/:%#@$&?!()\[\]~.,=+\-]+$/;
|
|
|
|
function getAttr(node: TreeAdapter.Node, attr: string): string {
|
|
return node.attrs.find(({ name }) => name === attr)?.value;
|
|
}
|
|
function attrHas(node: TreeAdapter.Node, attr: string, value: string): boolean {
|
|
const attrValue = getAttr(node, attr);
|
|
if (!attrValue) return false;
|
|
|
|
return new RegExp('\\b' + value + '\\b').test(attrValue);
|
|
}
|
|
|
|
export function fromHtml(html: string, quoteUri?: string | null): string {
|
|
const dom = parse5.parseFragment(
|
|
// some AP servers like Pixelfed use br tags as well as newlines
|
|
html.replace(/<br\s?\/?>\r?\n/gi, '\n'),
|
|
);
|
|
|
|
let text = '';
|
|
|
|
for (const n of dom.childNodes) {
|
|
analyze(n);
|
|
}
|
|
|
|
return text.trim();
|
|
|
|
function getText(node: TreeAdapter.Node): string {
|
|
if (treeAdapter.isTextNode(node)) return node.value;
|
|
if (!treeAdapter.isElementNode(node)) return '';
|
|
if (node.nodeName === 'br') return '\n';
|
|
|
|
if (node.childNodes.length > 0) {
|
|
return node.childNodes.map(n => getText(n)).join('');
|
|
}
|
|
|
|
return '';
|
|
}
|
|
|
|
function appendChildren(childNodes: TreeAdapter.ChildNode[]): void {
|
|
if (childNodes.length > 0) {
|
|
for (const n of childNodes) {
|
|
analyze(n);
|
|
}
|
|
}
|
|
}
|
|
|
|
function analyze(node: TreeAdapter.Node): void {
|
|
if (treeAdapter.isTextNode(node)) {
|
|
text += node.value;
|
|
return;
|
|
}
|
|
|
|
// Skip comment or document type node
|
|
if (!treeAdapter.isElementNode(node)) return;
|
|
|
|
switch (node.nodeName) {
|
|
case 'br': {
|
|
text += '\n';
|
|
break;
|
|
}
|
|
|
|
case 'a':
|
|
{
|
|
// trim spaces away, because some AP servers (app.wafrn.net) send strange
|
|
// zero width non-break space in strange places and things like that
|
|
const txt = getText(node).trim();
|
|
const href = getAttr(node, 'href');
|
|
|
|
// hashtags
|
|
if (txt.startsWith('#') && href && (attrHas(node, 'rel', 'tag') || attrHas(node, 'class', 'hashtag'))) {
|
|
text += txt;
|
|
// mentions: a link that starts with `@` and does not include space
|
|
} else if (txt.startsWith('@') && txt.match(/\s/) == null && !attrHas(node, 'rel', 'me')) {
|
|
const part = txt.split('@');
|
|
|
|
if (part.length === 2 && href) {
|
|
// restore the host name part
|
|
const acct = `${txt}@${(new URL(href)).hostname}`;
|
|
text += acct;
|
|
} else if (part.length === 3) {
|
|
text += txt;
|
|
}
|
|
// other
|
|
} else {
|
|
const generateLink = () => {
|
|
if (!href && !txt) {
|
|
return '';
|
|
}
|
|
if (!href) {
|
|
return txt;
|
|
}
|
|
if (!txt || txt === href) { // #6383: Missing text node
|
|
if (href.match(urlRegexFull)) {
|
|
return href;
|
|
} else {
|
|
return `<${href}>`;
|
|
}
|
|
}
|
|
if (href.match(urlRegex) && !href.match(urlRegexFull)) {
|
|
return `[${txt}](<${href}>)`; // #6846
|
|
} else {
|
|
return `[${txt}](${href})`;
|
|
}
|
|
};
|
|
|
|
text += generateLink();
|
|
}
|
|
break;
|
|
}
|
|
|
|
case 'h1':
|
|
{
|
|
text += '【';
|
|
appendChildren(node.childNodes);
|
|
text += '】\n';
|
|
break;
|
|
}
|
|
|
|
case 'b':
|
|
case 'strong':
|
|
{
|
|
text += '**';
|
|
appendChildren(node.childNodes);
|
|
text += '**';
|
|
break;
|
|
}
|
|
|
|
case 'small':
|
|
{
|
|
text += '<small>';
|
|
appendChildren(node.childNodes);
|
|
text += '</small>';
|
|
break;
|
|
}
|
|
|
|
case 's':
|
|
case 'del':
|
|
{
|
|
text += '~~';
|
|
appendChildren(node.childNodes);
|
|
text += '~~';
|
|
break;
|
|
}
|
|
|
|
case 'i':
|
|
case 'em':
|
|
{
|
|
text += '<i>';
|
|
appendChildren(node.childNodes);
|
|
text += '</i>';
|
|
break;
|
|
}
|
|
|
|
// block code (<pre><code>)
|
|
case 'pre': {
|
|
if (node.childNodes.length === 1 && node.childNodes[0].nodeName === 'code') {
|
|
text += '\n```\n';
|
|
text += getText(node.childNodes[0]);
|
|
text += '\n```\n';
|
|
} else {
|
|
appendChildren(node.childNodes);
|
|
}
|
|
break;
|
|
}
|
|
|
|
// inline code (<code>)
|
|
case 'code': {
|
|
text += '`';
|
|
appendChildren(node.childNodes);
|
|
text += '`';
|
|
break;
|
|
}
|
|
|
|
// inline or block KaTeX
|
|
case 'math': {
|
|
// This node should contain <semantics>[...]<annotation/>[...]</semantics> tag with the "source code".
|
|
if (node.childNodes.length !== 1 || node.childNodes[0].nodeName !== 'semantics')
|
|
break;
|
|
const semantics = node.childNodes[0];
|
|
|
|
// only select well formed annotations
|
|
const annotations = semantics.childNodes
|
|
.filter(node =>
|
|
node.nodeName === 'annotation'
|
|
&& node.childNodes.length === 1
|
|
&& node.childNodes[0].nodeName === '#text'
|
|
);
|
|
if (annotations.length === 0)
|
|
break;
|
|
|
|
let annotation = annotations[0];
|
|
// try to prefer a TeX annotation if there are multiple annotations
|
|
const filteredAnnotations = annotations.filter(node => node.attrs.some(attribute => attribute.name === 'encoding' && attribute.value === 'application/x-tex'));
|
|
if (filteredAnnotations.length > 0) {
|
|
annotation = filteredAnnotations[0];
|
|
}
|
|
|
|
const formula = annotation.childNodes[0].value;
|
|
if (annotation.attrs.some(attribute => attribute.name === 'encoding' && attribute.value === 'application/x-tex')) {
|
|
// can be rendered as KaTeX, now decide if it is possible to render as inline or not
|
|
if (/[\r\n]/.test(formula)) {
|
|
// line break, this must be rendered as a block
|
|
text += '\n\\[' + formula + '\\]\n';
|
|
} else {
|
|
// render as inline
|
|
text += '\\(' + formula + '\\)';
|
|
}
|
|
} else {
|
|
// not KaTeX, but if there is a plaintext annotation it can still be rendered as code
|
|
if (/[\r\n]/.test(formula)) {
|
|
// line break, this must be rendered as a block
|
|
text += '\n```\n' + formula + '\n```\n';
|
|
} else {
|
|
// render as inline
|
|
text += '`' + formula + '`';
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
|
|
case 'blockquote': {
|
|
const t = getText(node);
|
|
if (t) {
|
|
text += '\n> ';
|
|
text += t.split('\n').join('\n> ');
|
|
}
|
|
break;
|
|
}
|
|
|
|
case 'p':
|
|
case 'h2':
|
|
case 'h3':
|
|
case 'h4':
|
|
case 'h5':
|
|
case 'h6':
|
|
{
|
|
text += '\n\n';
|
|
appendChildren(node.childNodes);
|
|
break;
|
|
}
|
|
|
|
// other block elements
|
|
case 'div':
|
|
case 'header':
|
|
case 'footer':
|
|
case 'article':
|
|
case 'li':
|
|
case 'dt':
|
|
case 'dd':
|
|
{
|
|
text += '\n';
|
|
appendChildren(node.childNodes);
|
|
break;
|
|
}
|
|
|
|
case 'span':
|
|
{
|
|
if (attrHas(node, 'class', 'quote-inline') && quoteUri && getText(node).trim() === `RE: ${quoteUri}`) {
|
|
// embedded quote thingy for backwards compatibility, don't show it
|
|
} else {
|
|
appendChildren(node.childNodes);
|
|
}
|
|
break;
|
|
}
|
|
|
|
default: // includes inline elements
|
|
{
|
|
appendChildren(node.childNodes);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|