server: ol/ul for html->mfm
Implements a simple rendering for ordered and unordered lists. To properly support nested lists, and lists inside of blockquotes, this necessitated rewriting the `analyze` function to return text. This now also means that markup inside of blockquotes will be parsed properly.
This commit is contained in:
parent
2ed5ecd6a1
commit
650869e2f7
1 changed files with 92 additions and 82 deletions
|
@ -23,13 +23,14 @@ export function fromHtml(html: string, quoteUri?: string | null): string {
|
|||
html.replace(/<br\s?\/?>\r?\n/gi, '\n'),
|
||||
);
|
||||
|
||||
let text = '';
|
||||
// stores if we are parsing any lists.
|
||||
// 0 for a level that is an unordered list, otherwise the counter for the ordered list
|
||||
let listIndex: number[] = [];
|
||||
|
||||
for (const n of dom.childNodes) {
|
||||
analyze(n);
|
||||
}
|
||||
|
||||
return text.trim();
|
||||
return dom.childNodes
|
||||
.map(analyze)
|
||||
.join('')
|
||||
.trim();
|
||||
|
||||
function getText(node: TreeAdapter.Node): string {
|
||||
if (treeAdapter.isTextNode(node)) return node.value;
|
||||
|
@ -43,60 +44,55 @@ export function fromHtml(html: string, quoteUri?: string | null): string {
|
|||
return '';
|
||||
}
|
||||
|
||||
function appendChildren(childNodes: TreeAdapter.ChildNode[]): void {
|
||||
if (childNodes.length > 0) {
|
||||
for (const n of childNodes) {
|
||||
analyze(n);
|
||||
}
|
||||
}
|
||||
function analyzeMultiple(childNodes: TreeAdapter.ChildNode[]): string {
|
||||
return childNodes.map(analyze).join('');
|
||||
}
|
||||
|
||||
function analyze(node: TreeAdapter.Node): void {
|
||||
function analyze(node: TreeAdapter.Node): string {
|
||||
if (treeAdapter.isTextNode(node)) {
|
||||
text += node.value;
|
||||
return;
|
||||
return node.value;
|
||||
}
|
||||
|
||||
// Skip comment or document type node
|
||||
if (!treeAdapter.isElementNode(node)) return;
|
||||
if (!treeAdapter.isElementNode(node)) return '';
|
||||
|
||||
switch (node.nodeName) {
|
||||
case 'br': {
|
||||
text += '\n';
|
||||
break;
|
||||
return '\n';
|
||||
}
|
||||
|
||||
case 'a':
|
||||
{
|
||||
let text = '';
|
||||
// trim spaces away, because some AP servers (app.wafrn.net) send strange
|
||||
// zero width non-break space in strange places and things like that
|
||||
const txt = getText(node).trim();
|
||||
const linkText = getText(node).trim();
|
||||
const href = getAttr(node, 'href');
|
||||
|
||||
// hashtags
|
||||
if (txt.startsWith('#') && href && (attrHas(node, 'rel', 'tag') || attrHas(node, 'class', 'hashtag'))) {
|
||||
text += txt;
|
||||
if (linkText.startsWith('#') && href && (attrHas(node, 'rel', 'tag') || attrHas(node, 'class', 'hashtag'))) {
|
||||
text += linkText;
|
||||
// mentions: a link that starts with `@` and does not include space
|
||||
} else if (txt.startsWith('@') && txt.match(/\s/) == null && !attrHas(node, 'rel', 'me')) {
|
||||
const part = txt.split('@');
|
||||
} else if (linkText.startsWith('@') && linkText.match(/\s/) == null && !attrHas(node, 'rel', 'me')) {
|
||||
const part = linkText.split('@');
|
||||
|
||||
if (part.length === 2 && href) {
|
||||
// restore the host name part
|
||||
const acct = `${txt}@${(new URL(href)).hostname}`;
|
||||
const acct = `${linkText}@${(new URL(href)).hostname}`;
|
||||
text += acct;
|
||||
} else if (part.length === 3) {
|
||||
text += txt;
|
||||
text += linkText;
|
||||
}
|
||||
// other
|
||||
} else {
|
||||
const generateLink = () => {
|
||||
if (!href && !txt) {
|
||||
if (!href && !linkText) {
|
||||
return '';
|
||||
}
|
||||
if (!href) {
|
||||
return txt;
|
||||
return linkText;
|
||||
}
|
||||
if (!txt || txt === href) { // #6383: Missing text node
|
||||
if (!linkText || linkText === href) { // #6383: Missing text node
|
||||
if (href.match(urlRegexFull)) {
|
||||
return href;
|
||||
} else {
|
||||
|
@ -104,78 +100,57 @@ export function fromHtml(html: string, quoteUri?: string | null): string {
|
|||
}
|
||||
}
|
||||
if (href.match(urlRegex) && !href.match(urlRegexFull)) {
|
||||
return `[${txt}](<${href}>)`; // #6846
|
||||
return `[${linkText}](<${href}>)`; // #6846
|
||||
} else {
|
||||
return `[${txt}](${href})`;
|
||||
return `[${linkText}](${href})`;
|
||||
}
|
||||
};
|
||||
|
||||
text += generateLink();
|
||||
}
|
||||
break;
|
||||
return text;
|
||||
}
|
||||
|
||||
case 'h1':
|
||||
{
|
||||
text += '【';
|
||||
appendChildren(node.childNodes);
|
||||
text += '】\n';
|
||||
break;
|
||||
return '【' + analyzeMultiple(node.childNodes) + '】\n';
|
||||
}
|
||||
|
||||
case 'b':
|
||||
case 'strong':
|
||||
{
|
||||
text += '**';
|
||||
appendChildren(node.childNodes);
|
||||
text += '**';
|
||||
break;
|
||||
return '**' + analyzeMultiple(node.childNodes) + '**';
|
||||
}
|
||||
|
||||
case 'small':
|
||||
{
|
||||
text += '<small>';
|
||||
appendChildren(node.childNodes);
|
||||
text += '</small>';
|
||||
break;
|
||||
return '<small>' + analyzeMultiple(node.childNodes) + '</small>';
|
||||
}
|
||||
|
||||
case 's':
|
||||
case 'del':
|
||||
{
|
||||
text += '~~';
|
||||
appendChildren(node.childNodes);
|
||||
text += '~~';
|
||||
break;
|
||||
return '~~' + analyzeMultiple(node.childNodes) + '~~';
|
||||
}
|
||||
|
||||
case 'i':
|
||||
case 'em':
|
||||
{
|
||||
text += '<i>';
|
||||
appendChildren(node.childNodes);
|
||||
text += '</i>';
|
||||
break;
|
||||
return '<i>' + analyzeMultiple(node.childNodes) + '</i>';
|
||||
}
|
||||
|
||||
// block code (<pre><code>)
|
||||
case 'pre': {
|
||||
if (node.childNodes.length === 1 && node.childNodes[0].nodeName === 'code') {
|
||||
text += '\n```\n';
|
||||
text += getText(node.childNodes[0]);
|
||||
text += '\n```\n';
|
||||
return '\n```\n' + getText(node.childNodes[0]) + '\n```\n';
|
||||
} else {
|
||||
appendChildren(node.childNodes);
|
||||
return analyzeMultiple(node.childNodes);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// inline code (<code>)
|
||||
case 'code': {
|
||||
text += '`';
|
||||
appendChildren(node.childNodes);
|
||||
text += '`';
|
||||
break;
|
||||
return '`' + analyzeMultiple(node.childNodes) + '`';
|
||||
}
|
||||
|
||||
// inline or block KaTeX
|
||||
|
@ -207,31 +182,27 @@ export function fromHtml(html: string, quoteUri?: string | null): string {
|
|||
// can be rendered as KaTeX, now decide if it is possible to render as inline or not
|
||||
if (/[\r\n]/.test(formula)) {
|
||||
// line break, this must be rendered as a block
|
||||
text += '\n\\[' + formula + '\\]\n';
|
||||
return '\n\\[' + formula + '\\]\n';
|
||||
} else {
|
||||
// render as inline
|
||||
text += '\\(' + formula + '\\)';
|
||||
return '\\(' + formula + '\\)';
|
||||
}
|
||||
} else {
|
||||
// not KaTeX, but if there is a plaintext annotation it can still be rendered as code
|
||||
if (/[\r\n]/.test(formula)) {
|
||||
// line break, this must be rendered as a block
|
||||
text += '\n```\n' + formula + '\n```\n';
|
||||
return '\n```\n' + formula + '\n```\n';
|
||||
} else {
|
||||
// render as inline
|
||||
text += '`' + formula + '`';
|
||||
return '`' + formula + '`';
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case 'blockquote': {
|
||||
const t = getText(node);
|
||||
if (t) {
|
||||
text += '\n> ';
|
||||
text += t.split('\n').join('\n> ');
|
||||
}
|
||||
break;
|
||||
return analyzeMultiple(node.childNodes)
|
||||
.trim()
|
||||
.replace(/^|\n/g, '\n>');
|
||||
}
|
||||
|
||||
case 'p':
|
||||
|
@ -241,9 +212,50 @@ export function fromHtml(html: string, quoteUri?: string | null): string {
|
|||
case 'h5':
|
||||
case 'h6':
|
||||
{
|
||||
text += '\n\n';
|
||||
appendChildren(node.childNodes);
|
||||
break;
|
||||
return '\n\n' + analyzeMultiple(node.childNodes);
|
||||
}
|
||||
|
||||
// lists and list items
|
||||
case 'ol':
|
||||
case 'ul':
|
||||
{
|
||||
if (node.nodeName == 'ol') {
|
||||
listIndex.push(1);
|
||||
} else {
|
||||
listIndex.push(0);
|
||||
}
|
||||
let text = '\n' + analyzeMultiple(node.childNodes);
|
||||
listIndex.pop();
|
||||
return text;
|
||||
}
|
||||
|
||||
case 'li':
|
||||
{
|
||||
if (listIndex.length == 0) {
|
||||
break;
|
||||
}
|
||||
let text = '\n';
|
||||
|
||||
// pop the current operating on index for manipulation
|
||||
let index = listIndex.pop();
|
||||
// indent the start of the list item respecitve of the level of
|
||||
// nesting of lists
|
||||
//
|
||||
// since we popped the current index, the length will be 0 on
|
||||
// the first level, thus causing no indent on the first level
|
||||
text += ' '.repeat(listIndex.length);
|
||||
if (index == 0) {
|
||||
text += '- ';
|
||||
} else {
|
||||
text += index + ') ';
|
||||
index++;
|
||||
}
|
||||
// done with the index, put it back so nested lists with
|
||||
// analyzeMultiple will work correctly
|
||||
listIndex.push(index);
|
||||
|
||||
text += analyzeMultiple(node.childNodes);
|
||||
return text;
|
||||
}
|
||||
|
||||
// other block elements
|
||||
|
@ -251,30 +263,28 @@ export function fromHtml(html: string, quoteUri?: string | null): string {
|
|||
case 'header':
|
||||
case 'footer':
|
||||
case 'article':
|
||||
case 'li':
|
||||
case 'dt':
|
||||
case 'dd':
|
||||
{
|
||||
text += '\n';
|
||||
appendChildren(node.childNodes);
|
||||
break;
|
||||
return '\n' + analyzeMultiple(node.childNodes);
|
||||
}
|
||||
|
||||
case 'span':
|
||||
{
|
||||
if (attrHas(node, 'class', 'quote-inline') && quoteUri && getText(node).trim() === `RE: ${quoteUri}`) {
|
||||
// embedded quote thingy for backwards compatibility, don't show it
|
||||
return '';
|
||||
} else {
|
||||
appendChildren(node.childNodes);
|
||||
return analyzeMultiple(node.childNodes);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default: // includes inline elements
|
||||
{
|
||||
appendChildren(node.childNodes);
|
||||
break;
|
||||
return analyzeMultiple(node.childNodes);
|
||||
}
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue