[MFM] Improve various parsing

Resolve #2779
Resolve #3053
This commit is contained in:
syuilo 2018-11-16 21:57:19 +09:00
parent e3b8495431
commit 0ff390ed80
No known key found for this signature in database
GPG key ID: BDC4C49D06AB9D69
7 changed files with 79 additions and 46 deletions

View file

@ -8,7 +8,9 @@ export type TextElementHashtag = {
hashtag: string; hashtag: string;
}; };
export default function(text: string, isBegin: boolean) { export default function(text: string, before: string) {
const isBegin = before == '';
if (!(/^\s#[^\s\.,!\?#]+/.test(text) || (isBegin && /^#[^\s\.,!\?#]+/.test(text)))) return null; if (!(/^\s#[^\s\.,!\?#]+/.test(text) || (isBegin && /^#[^\s\.,!\?#]+/.test(text)))) return null;
const isHead = text.startsWith('#'); const isHead = text.startsWith('#');
const hashtag = text.match(/^\s?#[^\s\.,!\?#]+/)[0]; const hashtag = text.match(/^\s?#[^\s\.,!\?#]+/)[0];

View file

@ -12,9 +12,10 @@ export type TextElementMention = {
host: string; host: string;
}; };
export default function(text: string) { export default function(text: string, before: string) {
const match = text.match(/^@[a-z0-9_]+(?:@[a-z0-9\.\-]+[a-z0-9])?/i); const match = text.match(/^@[a-z0-9_]+(?:@[a-z0-9\.\-]+[a-z0-9])?/i);
if (!match) return null; if (!match) return null;
if (/[a-zA-Z0-9]$/.test(before)) return null;
const mention = match[0]; const mention = match[0];
const { username, host } = parseAcct(mention.substr(1)); const { username, host } = parseAcct(mention.substr(1));
const canonical = host != null ? `@${username}@${toUnicode(host)}` : mention; const canonical = host != null ? `@${username}@${toUnicode(host)}` : mention;

View file

@ -8,7 +8,9 @@ export type TextElementQuote = {
quote: string; quote: string;
}; };
export default function(text: string, isBegin: boolean) { export default function(text: string, before: string) {
const isBegin = before == '';
const match = text.match(/^"([\s\S]+?)\n"/) || text.match(/^\n>([\s\S]+?)(\n\n|$)/) || const match = text.match(/^"([\s\S]+?)\n"/) || text.match(/^\n>([\s\S]+?)(\n\n|$)/) ||
(isBegin ? text.match(/^>([\s\S]+?)(\n\n|$)/) : null); (isBegin ? text.match(/^>([\s\S]+?)(\n\n|$)/) : null);

View file

@ -8,7 +8,9 @@ export type TextElementTitle = {
title: string; title: string;
}; };
export default function(text: string, isBegin: boolean) { export default function(text: string, before: string) {
const isBegin = before == '';
const match = isBegin ? text.match(/^(【|\[)(.+?)(】|])\n/) : text.match(/^\n(【|\[)(.+?)(】|])\n/); const match = isBegin ? text.match(/^(【|\[)(.+?)(】|])\n/) : text.match(/^\n(【|\[)(.+?)(】|])\n/);
if (!match) return null; if (!match) return null;
return { return {

View file

@ -8,12 +8,13 @@ export type TextElementUrl = {
url: string; url: string;
}; };
export default function(text: string) { export default function(text: string, before: string) {
const match = text.match(/^https?:\/\/[\w\/:%#@\$&\?!\(\)\[\]~\.,=\+\-]+/); const match = text.match(/^https?:\/\/[\w\/:%#@\$&\?!\(\)\[\]~\.,=\+\-]+/);
if (!match) return null; if (!match) return null;
let url = match[0]; let url = match[0];
if (url.endsWith('.')) url = url.substr(0, url.lastIndexOf('.')); if (url.endsWith('.')) url = url.substr(0, url.lastIndexOf('.'));
if (url.endsWith(',')) url = url.substr(0, url.lastIndexOf(',')); if (url.endsWith(',')) url = url.substr(0, url.lastIndexOf(','));
if (url.endsWith(')') && before.endsWith('(')) url = url.substr(0, url.lastIndexOf(')'));
return { return {
type: 'url', type: 'url',
content: url, content: url,

View file

@ -52,7 +52,7 @@ export type TextElement = { type: 'text', content: string }
| TextElementTitle | TextElementTitle
| TextElementUrl | TextElementUrl
| TextElementMotion; | TextElementMotion;
export type TextElementProcessor = (text: string, isBegin: boolean) => TextElement | TextElement[]; export type TextElementProcessor = (text: string, before: string) => TextElement | TextElement[];
export default (source: string): TextElement[] => { export default (source: string): TextElement[] => {
if (source == null || source == '') { if (source == null || source == '') {
@ -68,12 +68,10 @@ export default (source: string): TextElement[] => {
} }
} }
let i = 0;
// パース // パース
while (source != '') { while (source != '') {
const parsed = elements.some(el => { const parsed = elements.some(el => {
let _tokens = el(source, i == 0); let _tokens = el(source, tokens.map(token => token.content).join(''));
if (_tokens) { if (_tokens) {
if (!Array.isArray(_tokens)) { if (!Array.isArray(_tokens)) {
_tokens = [_tokens]; _tokens = [_tokens];
@ -91,8 +89,6 @@ export default (source: string): TextElement[] => {
content: source[0] content: source[0]
}); });
} }
i++;
} }
const combineText = (es: TextElement[]): TextElement => const combineText = (es: TextElement[]): TextElement =>

View file

@ -82,7 +82,7 @@ describe('Text', () => {
{ type: 'text', content: ' お腹ペコい' } { type: 'text', content: ' お腹ペコい' }
], tokens); ], tokens);
}); });
/*
it('ignore', () => { it('ignore', () => {
const tokens = analyze('idolm@ster'); const tokens = analyze('idolm@ster');
assert.deepEqual([ assert.deepEqual([
@ -91,20 +91,19 @@ describe('Text', () => {
const tokens2 = analyze('@a\n@b\n@c'); const tokens2 = analyze('@a\n@b\n@c');
assert.deepEqual([ assert.deepEqual([
{ type: 'mention', content: '@a', username: 'a', host: null }, { type: 'mention', content: '@a', canonical: '@a', username: 'a', host: null },
{ type: 'text', content: '\n' }, { type: 'text', content: '\n' },
{ type: 'mention', content: '@b', username: 'b', host: null }, { type: 'mention', content: '@b', canonical: '@b', username: 'b', host: null },
{ type: 'text', content: '\n' }, { type: 'text', content: '\n' },
{ type: 'mention', content: '@c', username: 'c', host: null } { type: 'mention', content: '@c', canonical: '@c', username: 'c', host: null }
], tokens2); ], tokens2);
const tokens3 = analyze('**x**@a'); const tokens3 = analyze('**x**@a');
assert.deepEqual([ assert.deepEqual([
{ type: 'bold', content: '**x**', bold: 'x' }, { type: 'bold', content: '**x**', bold: 'x' },
{ type: 'mention', content: '@a', username: 'a', host: null } { type: 'mention', content: '@a', canonical: '@a', username: 'a', host: null }
], tokens3); ], tokens3);
}); });
*/
}); });
it('hashtag', () => { it('hashtag', () => {
@ -159,38 +158,68 @@ describe('Text', () => {
], tokens5); ], tokens5);
}); });
it('url', () => { describe('url', () => {
const tokens1 = analyze('https://example.com'); it('simple', () => {
assert.deepEqual([{ const tokens = analyze('https://example.com');
type: 'url', assert.deepEqual([{
content: 'https://example.com', type: 'url',
url: 'https://example.com' content: 'https://example.com',
}], tokens1); url: 'https://example.com'
}], tokens);
});
const tokens2 = analyze('https://example.com.'); it('ignore trailing dot', () => {
assert.deepEqual([{ const tokens = analyze('https://example.com.');
type: 'url', assert.deepEqual([{
content: 'https://example.com', type: 'url',
url: 'https://example.com' content: 'https://example.com',
}, { url: 'https://example.com'
type: 'text', content: '.' }, {
}], tokens2); type: 'text', content: '.'
}], tokens);
});
const tokens3 = analyze('https://example.com/foo?bar=a,b'); it('with comma', () => {
assert.deepEqual([{ const tokens = analyze('https://example.com/foo?bar=a,b');
type: 'url', assert.deepEqual([{
content: 'https://example.com/foo?bar=a,b', type: 'url',
url: 'https://example.com/foo?bar=a,b' content: 'https://example.com/foo?bar=a,b',
}], tokens3); url: 'https://example.com/foo?bar=a,b'
}], tokens);
});
const tokens4 = analyze('https://example.com/foo, bar'); it('ignore trailing comma', () => {
assert.deepEqual([{ const tokens = analyze('https://example.com/foo, bar');
type: 'url', assert.deepEqual([{
content: 'https://example.com/foo', type: 'url',
url: 'https://example.com/foo' content: 'https://example.com/foo',
}, { url: 'https://example.com/foo'
type: 'text', content: ', bar' }, {
}], tokens4); type: 'text', content: ', bar'
}], tokens);
});
it('with brackets', () => {
const tokens = analyze('https://example.com/foo(bar)');
assert.deepEqual([{
type: 'url',
content: 'https://example.com/foo(bar)',
url: 'https://example.com/foo(bar)'
}], tokens);
});
it('ignore parent brackets', () => {
const tokens = analyze('(https://example.com/foo)');
assert.deepEqual([{
type: 'text', content: '('
}, {
type: 'url',
content: 'https://example.com/foo',
url: 'https://example.com/foo'
}, {
type: 'text', content: ')'
}], tokens);
});
}); });
it('link', () => { it('link', () => {