From 8015fd76003419bd3ccca585ffe4416312800fe0 Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Sat, 10 Jun 2017 15:06:50 +0200 Subject: [PATCH] Improve RTL detection (#3682) - Use plaintext - Strip out URLs - Strip out mentions - Strip out hashtags - Strip out whitespace from "overall" count - Consistent between JS and Ruby --- app/helpers/stream_entries_helper.rb | 19 ++++++++++++++++++- .../mastodon/components/status_content.js | 2 +- app/javascript/mastodon/rtl.js | 6 +++++- .../stream_entries/_detailed_status.html.haml | 2 +- .../stream_entries/_simple_status.html.haml | 2 +- 5 files changed, 26 insertions(+), 5 deletions(-) diff --git a/app/helpers/stream_entries_helper.rb b/app/helpers/stream_entries_helper.rb index 275762c87..a17b02128 100644 --- a/app/helpers/stream_entries_helper.rb +++ b/app/helpers/stream_entries_helper.rb @@ -47,11 +47,16 @@ module StreamEntriesHelper end end + def rtl_status?(status) + status.local? ? rtl?(status.text) : rtl?(strip_tags(status.text)) + end + def rtl?(text) + text = simplified_text(text) rtl_characters = /[\p{Hebrew}|\p{Arabic}|\p{Syriac}|\p{Thaana}|\p{Nko}]+/m.match(text) if rtl_characters.present? - total_size = text.strip.size.to_f + total_size = text.size.to_f rtl_size(rtl_characters.to_a) / total_size > 0.3 else false @@ -60,6 +65,18 @@ module StreamEntriesHelper private + def simplified_text(text) + text.dup.tap do |new_text| + URI.extract(new_text).each do |url| + new_text.gsub!(url, '') + end + + new_text.gsub!(Account::MENTION_RE, '') + new_text.gsub!(Tag::HASHTAG_RE, '') + new_text.gsub!(/\s+/, '') + end + end + def rtl_size(characters) characters.reduce(0) { |acc, elem| acc + elem.size }.to_f end diff --git a/app/javascript/mastodon/components/status_content.js b/app/javascript/mastodon/components/status_content.js index 06e25b36a..d22854288 100644 --- a/app/javascript/mastodon/components/status_content.js +++ b/app/javascript/mastodon/components/status_content.js @@ -100,7 +100,7 @@ class StatusContent extends React.PureComponent { const spoilerContent = { __html: emojify(escapeTextContentForBrowser(status.get('spoiler_text', ''))) }; const directionStyle = { direction: 'ltr' }; - if (isRtl(status.get('content'))) { + if (isRtl(status.get('search_index'))) { directionStyle.direction = 'rtl'; } diff --git a/app/javascript/mastodon/rtl.js b/app/javascript/mastodon/rtl.js index 8f14bb338..00870a15d 100644 --- a/app/javascript/mastodon/rtl.js +++ b/app/javascript/mastodon/rtl.js @@ -17,11 +17,15 @@ export function isRtl(text) { return false; } + text = text.replace(/(?:^|[^\/\w])@([a-z0-9_]+(@[a-z0-9\.\-]+)?)/ig, ''); + text = text.replace(/(?:^|[^\/\w])#([\S]+)/ig, ''); + text = text.replace(/\s+/g, ''); + const matches = text.match(rtlChars); if (!matches) { return false; } - return matches.length / text.trim().length > 0.3; + return matches.length / text.length > 0.3; }; diff --git a/app/views/stream_entries/_detailed_status.html.haml b/app/views/stream_entries/_detailed_status.html.haml index a3e2f766e..ef60b9925 100644 --- a/app/views/stream_entries/_detailed_status.html.haml +++ b/app/views/stream_entries/_detailed_status.html.haml @@ -12,7 +12,7 @@ %p{ style: 'margin-bottom: 0' }< %span.p-summary> #{status.spoiler_text}  %a.status__content__spoiler-link{ href: '#' }= t('statuses.show_more') - .e-content{ lang: status.language, style: "display: #{status.spoiler_text? ? 'none' : 'block'}; direction: #{rtl?(status.content) ? 'rtl' : 'ltr'}" }= Formatter.instance.format(status) + .e-content{ lang: status.language, style: "display: #{status.spoiler_text? ? 'none' : 'block'}; direction: #{rtl_status?(status) ? 'rtl' : 'ltr'}" }= Formatter.instance.format(status) - unless status.media_attachments.empty? - if status.media_attachments.first.video? diff --git a/app/views/stream_entries/_simple_status.html.haml b/app/views/stream_entries/_simple_status.html.haml index 3fa347f74..db4e30fda 100644 --- a/app/views/stream_entries/_simple_status.html.haml +++ b/app/views/stream_entries/_simple_status.html.haml @@ -18,7 +18,7 @@ %p{ style: 'margin-bottom: 0' }< %span.p-summary> #{status.spoiler_text}  %a.status__content__spoiler-link{ href: '#' }= t('statuses.show_more') - .e-content{ lang: status.language, style: "display: #{status.spoiler_text? ? 'none' : 'block'}; direction: #{rtl?(status.content) ? 'rtl' : 'ltr'}" }= Formatter.instance.format(status) + .e-content{ lang: status.language, style: "display: #{status.spoiler_text? ? 'none' : 'block'}; direction: #{rtl_status?(status) ? 'rtl' : 'ltr'}" }= Formatter.instance.format(status) - unless status.media_attachments.empty? .status__attachments