diff --git a/app/lib/formatter.rb b/app/lib/formatter.rb
index c771dcaaa..2c5674869 100644
--- a/app/lib/formatter.rb
+++ b/app/lib/formatter.rb
@@ -245,9 +245,9 @@ class Formatter
end
standard = Extractor.extract_entities_with_indices(text, options)
- xmpp = Extractor.extract_xmpp_uris_with_indices(text, options)
+ extra = Extractor.extract_extra_uris_with_indices(text, options)
- Extractor.remove_overlapping_entities(special + standard + xmpp)
+ Extractor.remove_overlapping_entities(special + standard + extra)
end
def link_to_url(entity, options = {})
diff --git a/app/lib/sanitize_config.rb b/app/lib/sanitize_config.rb
index e2480376e..a82411127 100644
--- a/app/lib/sanitize_config.rb
+++ b/app/lib/sanitize_config.rb
@@ -2,7 +2,7 @@
class Sanitize
module Config
- HTTP_PROTOCOLS ||= ['http', 'https', 'dat', 'dweb', 'ipfs', 'ipns', 'ssb', 'gopher', 'xmpp', :relative].freeze
+ HTTP_PROTOCOLS ||= ['http', 'https', 'dat', 'dweb', 'ipfs', 'ipns', 'ssb', 'gopher', 'xmpp', 'magnet', :relative].freeze
CLASS_WHITELIST_TRANSFORMER = lambda do |env|
node = env[:node]
diff --git a/config/initializers/twitter_regex.rb b/config/initializers/twitter_regex.rb
index 87815d458..f84f7c0cb 100644
--- a/config/initializers/twitter_regex.rb
+++ b/config/initializers/twitter_regex.rb
@@ -47,32 +47,39 @@ module Twitter
#{REGEXEN[:validate_url_pct_encoded]}|
#{REGEXEN[:validate_url_sub_delims]}
)/iox
- REGEXEN[:valid_xmpp_uri] = %r{
- ( # $1 total match
- (#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceding character
- ( # $3 URL
- ((?:xmpp):) # $4 Protocol
- (//#{REGEXEN[:validate_nodeid]}+@#{REGEXEN[:valid_domain]}/)? # $5 Authority (optional)
- (#{REGEXEN[:validate_nodeid]}+@)? # $6 Username in path (optional)
- (#{REGEXEN[:valid_domain]}) # $7 Domain in path
- (/#{REGEXEN[:validate_resid]}+)? # $8 Resource in path (optional)
- (\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # $9 Query String
+ REGEXEN[:xmpp_uri] = %r{
+ (xmpp:) # Protocol
+ (//#{REGEXEN[:validate_nodeid]}+@#{REGEXEN[:valid_domain]}/)? # Authority (optional)
+ (#{REGEXEN[:validate_nodeid]}+@)? # Username in path (optional)
+ (#{REGEXEN[:valid_domain]}) # Domain in path
+ (/#{REGEXEN[:validate_resid]}+)? # Resource in path (optional)
+ (\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # Query String
+ }iox
+ REGEXEN[:magnet_uri] = %r{
+ (magnet:) # Protocol
+ (\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]}) # Query String
+ }iox
+ REGEXEN[:valid_extended_uri] = %r{
+ ( # $1 total match
+ (#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceding character
+ ( # $3 URL
+ (#{REGEXEN[:xmpp_uri]}) | (#{REGEXEN[:magnet_uri]})
)
)
}iox
end
module Extractor
- # Extracts a list of all XMPP URIs included in the Tweet text along
+ # Extracts a list of all XMPP and magnet URIs included in the Toot text along
# with the indices. If the text is nil or contains no
- # XMPP URIs an empty array will be returned.
+ # XMPP or magnet URIs an empty array will be returned.
#
# If a block is given then it will be called for each XMPP URI.
- def extract_xmpp_uris_with_indices(text, options = {}) # :yields: uri, start, end
+ def extract_extra_uris_with_indices(text, options = {}) # :yields: uri, start, end
return [] unless text && text.index(":")
urls = []
- text.to_s.scan(Twitter::Regex[:valid_xmpp_uri]) do
+ text.to_s.scan(Twitter::Regex[:valid_extended_uri]) do
valid_uri_match_data = $~
start_position = valid_uri_match_data.char_begin(3)
diff --git a/spec/lib/formatter_spec.rb b/spec/lib/formatter_spec.rb
index 83be0a588..633d59c2a 100644
--- a/spec/lib/formatter_spec.rb
+++ b/spec/lib/formatter_spec.rb
@@ -258,6 +258,14 @@ RSpec.describe Formatter do
is_expected.to include 'href="xmpp:muc@instance.com?join"'
end
end
+
+ context 'given text containing a magnet: URI' do
+ let(:text) { 'wikipedia gives this example of a magnet uri: magnet:?xt=urn:btih:c12fe1c06bba254a9dc9f519b335aa7c1367a88a' }
+
+ it 'matches the full URI' do
+ is_expected.to include 'href="magnet:?xt=urn:btih:c12fe1c06bba254a9dc9f519b335aa7c1367a88a"'
+ end
+ end
end
describe '#format_spoiler' do