server: refactor fromHtml attribute handling

Also try to recognize owncast hashtag links.
This commit is contained in:
Johann150 2022-12-04 03:43:22 +01:00
parent cda9197700
commit 97052b1f61
Signed by untrusted user: Johann150
GPG key ID: 9EE6577A2A06F8F1

View file

@ -7,6 +7,16 @@ const treeAdapter = parse5.defaultTreeAdapter;
const urlRegex = /^https?:\/\/[\w\/:%#@$&?!()\[\]~.,=+\-]+/; const urlRegex = /^https?:\/\/[\w\/:%#@$&?!()\[\]~.,=+\-]+/;
const urlRegexFull = /^https?:\/\/[\w\/:%#@$&?!()\[\]~.,=+\-]+$/; const urlRegexFull = /^https?:\/\/[\w\/:%#@$&?!()\[\]~.,=+\-]+$/;
function getAttr(node: TreeAdapter.Node, attr: string): string {
return node.attrs.find(({ name }) => name === attr)?.value;
}
function attrHas(node: TreeAdapter.Node, attr: string, value: string): boolean {
const attrValue = getAttr(node, attr);
if (!attrValue) return false;
return new RegExp('\\b' + value + '\\b').test(attrValue);
}
export function fromHtml(html: string, quoteUri?: string | null): string { export function fromHtml(html: string, quoteUri?: string | null): string {
const dom = parse5.parseFragment( const dom = parse5.parseFragment(
// some AP servers like Pixelfed use br tags as well as newlines // some AP servers like Pixelfed use br tags as well as newlines
@ -59,19 +69,18 @@ export function fromHtml(html: string, quoteUri?: string | null): string {
case 'a': case 'a':
{ {
const txt = getText(node); const txt = getText(node);
const rel = node.attrs.find(x => x.name === 'rel'); const href = getAttr(node, 'href');
const href = node.attrs.find(x => x.name === 'href');
// hashtags // hashtags
if (txt.startsWith('#') && href && /\btag\b/.test(rel?.value)) { if (txt.startsWith('#') && href && (attrHas(node, 'rel', 'tag') || attrHas(node, 'class', 'hashtag')) {
text += txt; text += txt;
// mentions // mentions
} else if (txt.startsWith('@') && !(rel && rel.value.match(/^me /))) { } else if (txt.startsWith('@') && !attrHas(node, 'rel', 'me')) {
const part = txt.split('@'); const part = txt.split('@');
if (part.length === 2 && href) { if (part.length === 2 && href) {
// restore the host name part // restore the host name part
const acct = `${txt}@${(new URL(href.value)).hostname}`; const acct = `${txt}@${(new URL(href)).hostname}`;
text += acct; text += acct;
} else if (part.length === 3) { } else if (part.length === 3) {
text += txt; text += txt;
@ -85,17 +94,17 @@ export function fromHtml(html: string, quoteUri?: string | null): string {
if (!href) { if (!href) {
return txt; return txt;
} }
if (!txt || txt === href.value) { // #6383: Missing text node if (!txt || txt === href) { // #6383: Missing text node
if (href.value.match(urlRegexFull)) { if (href.match(urlRegexFull)) {
return href.value; return href;
} else { } else {
return `<${href.value}>`; return `<${href}>`;
} }
} }
if (href.value.match(urlRegex) && !href.value.match(urlRegexFull)) { if (href.match(urlRegex) && !href.match(urlRegexFull)) {
return `[${txt}](<${href.value}>)`; // #6846 return `[${txt}](<${href}>)`; // #6846
} else { } else {
return `[${txt}](${href.value})`; return `[${txt}](${href})`;
} }
}; };
@ -204,8 +213,7 @@ export function fromHtml(html: string, quoteUri?: string | null): string {
case 'span': case 'span':
{ {
const nodeClass = node.attrs.find(({ name }) => name === 'class')?.value; if (attrHas(node, 'class', 'quote-inline') && quoteUri && getText(node).trim() === `RE: ${quoteUri}`) {
if (/\bquote-inline\b/.test(nodeClass) && quoteUri && getText(node).trim() === `RE: ${quoteUri}`) {
// embedded quote thingy for backwards compatibility, don't show it // embedded quote thingy for backwards compatibility, don't show it
} else { } else {
appendChildren(node.childNodes); appendChildren(node.childNodes);