From 0f39b1087f052148f8f83aac6c24e99b24413767 Mon Sep 17 00:00:00 2001 From: Daniel Schwarz Date: Fri, 22 Sep 2023 21:32:19 -0400 Subject: [PATCH] Support to display a limited set of HTML tags HTML tag support is aligned with Mastodon 4.2 supported tags. This code introduces a soft dependency on the urwidgets library. If urwidgets is not available, HTML tags are still supported, but hyperlinks are not underlined using the OCS 8 terminal feature (on supported terminals). --- .flake8 | 1 + toot/tui/app.py | 17 -- toot/tui/constants.py | 23 ++ toot/tui/overlays.py | 21 +- toot/tui/poll.py | 12 +- toot/tui/richtext.py | 457 ++++++++++++++++++++++++++++++ toot/tui/stubs/stub_hyperlink.py | 30 ++ toot/tui/stubs/stub_text_embed.py | 29 ++ toot/tui/stubs/urwidgets.py | 8 + toot/tui/timeline.py | 40 ++- toot/tui/urwidgets.py | 8 + toot/tui/utils.py | 15 - toot/utils/__init__.py | 12 + 13 files changed, 619 insertions(+), 54 deletions(-) create mode 100644 toot/tui/richtext.py create mode 100644 toot/tui/stubs/stub_hyperlink.py create mode 100644 toot/tui/stubs/stub_text_embed.py create mode 100644 toot/tui/stubs/urwidgets.py create mode 100644 toot/tui/urwidgets.py diff --git a/.flake8 b/.flake8 index 6efbecd..cc916ad 100644 --- a/.flake8 +++ b/.flake8 @@ -1,4 +1,5 @@ [flake8] exclude=build,tests,tmp,venv,toot/tui/scroll.py ignore=E128,W503 +per-file-ignores=toot/tui/stubs/urwidgets.py:F401 max-line-length=120 diff --git a/toot/tui/app.py b/toot/tui/app.py index 9d78b12..6909d79 100644 --- a/toot/tui/app.py +++ b/toot/tui/app.py @@ -143,7 +143,6 @@ class TUI(urwid.Frame): def run(self): self.loop.set_alarm_in(0, lambda *args: self.async_load_instance()) self.loop.set_alarm_in(0, lambda *args: self.async_load_followed_accounts()) - self.loop.set_alarm_in(0, lambda *args: self.async_load_followed_tags()) self.loop.set_alarm_in(0, lambda *args: self.async_load_timeline( is_initial=True, timeline_name="home")) self.loop.run() @@ -339,22 +338,6 @@ class TUI(urwid.Frame): self.run_in_thread(_load_accounts, done_callback=_done_accounts) - def async_load_followed_tags(self): - def _load_tag_list(): - try: - return api.followed_tags(self.app, self.user) - except ApiError: - # not supported by all Mastodon servers so fail silently if necessary - return [] - - def _done_tag_list(tags): - if len(tags) > 0: - self.followed_tags = [t["name"] for t in tags] - else: - self.followed_tags = [] - - self.run_in_thread(_load_tag_list, done_callback=_done_tag_list) - def refresh_footer(self, timeline): """Show status details in footer.""" status, index, count = timeline.get_focused_status_with_counts() diff --git a/toot/tui/constants.py b/toot/tui/constants.py index 91bb3b7..f51ae61 100644 --- a/toot/tui/constants.py +++ b/toot/tui/constants.py @@ -57,6 +57,29 @@ PALETTE = [ ('dim', 'dark gray', ''), ('highlight', 'yellow', ''), ('success', 'dark green', ''), + + # HTML tag styling + ('a', ',italics', '', 'italics'), + # em tag is mapped to i + ('i', ',italics', '', 'italics'), + # strong tag is mapped to b + ('b', ',bold', '', 'bold'), + # special case for bold + italic nested tags + ('bi', ',bold,italics', '', ',bold,italics'), + ('u', ',underline', '', ',underline'), + ('del', ',strikethrough', '', ',strikethrough'), + ('code', 'light gray, standout', '', ',standout'), + ('pre', 'light gray, standout', '', ',standout'), + ('blockquote', 'light gray', '', ''), + ('h1', ',bold', '', ',bold'), + ('h2', ',bold', '', ',bold'), + ('h3', ',bold', '', ',bold'), + ('h4', ',bold', '', ',bold'), + ('h5', ',bold', '', ',bold'), + ('h6', ',bold', '', ',bold'), + ('class_mention_hashtag', 'light cyan', '', ''), + ('class_hashtag', 'light cyan', '', ''), + ] VISIBILITY_OPTIONS = [ diff --git a/toot/tui/overlays.py b/toot/tui/overlays.py index 75be80c..530921a 100644 --- a/toot/tui/overlays.py +++ b/toot/tui/overlays.py @@ -4,10 +4,10 @@ import urwid import webbrowser from toot import __version__ -from toot.utils import format_content -from .utils import highlight_hashtags, highlight_keys -from .widgets import Button, EditBox, SelectableText from toot import api +from toot.tui.utils import highlight_keys +from toot.tui.widgets import Button, EditBox, SelectableText +from toot.tui.richtext import ContentParser class StatusSource(urwid.Padding): @@ -255,6 +255,8 @@ class Account(urwid.ListBox): super().__init__(walker) def generate_contents(self, account, relationship=None, last_action=None): + parser = ContentParser() + if self.last_action and not self.last_action.startswith("Confirm"): yield Button(f"Confirm {self.last_action}", on_press=take_action, user_data=self) yield Button("Cancel", on_press=cancel_action, user_data=self) @@ -279,8 +281,10 @@ class Account(urwid.ListBox): if account["note"]: yield urwid.Divider() - for line in format_content(account["note"]): - yield urwid.Text(highlight_hashtags(line, followed_tags=set())) + + widgetlist = parser.html_to_widgets(account["note"]) + for line in widgetlist: + yield (line) yield urwid.Divider() yield urwid.Text(["ID: ", ("highlight", f"{account['id']}")]) @@ -312,8 +316,11 @@ class Account(urwid.ListBox): name = field["name"].title() yield urwid.Divider() yield urwid.Text([("bold", f"{name.rstrip(':')}"), ":"]) - for line in format_content(field["value"]): - yield urwid.Text(highlight_hashtags(line, followed_tags=set())) + + widgetlist = parser.html_to_widgets(field["value"]) + for line in widgetlist: + yield (line) + if field["verified_at"]: yield urwid.Text(("success", "✓ Verified")) diff --git a/toot/tui/poll.py b/toot/tui/poll.py index 0c3ff46..c92cc07 100644 --- a/toot/tui/poll.py +++ b/toot/tui/poll.py @@ -2,11 +2,9 @@ import urwid from toot import api from toot.exceptions import ApiError -from toot.utils import format_content from toot.utils.datetime import parse_datetime - -from .utils import highlight_hashtags from .widgets import Button, CheckBox, RadioButton +from .richtext import ContentParser class Poll(urwid.ListBox): @@ -87,8 +85,12 @@ class Poll(urwid.ListBox): def generate_contents(self, status): yield urwid.Divider() - for line in format_content(status.data["content"]): - yield urwid.Text(highlight_hashtags(line, set())) + + parser = ContentParser() + widgetlist = parser.html_to_widgets(status.data["content"]) + + for line in widgetlist: + yield (line) yield urwid.Divider() yield self.build_linebox(self.generate_poll_detail()) diff --git a/toot/tui/richtext.py b/toot/tui/richtext.py new file mode 100644 index 0000000..e74dff8 --- /dev/null +++ b/toot/tui/richtext.py @@ -0,0 +1,457 @@ +""" +richtext +""" +from typing import List, Tuple +import re +import urwid +import unicodedata +from .constants import PALETTE +from bs4 import BeautifulSoup +from bs4.element import NavigableString, Tag +from .stubs.urwidgets import TextEmbed, Hyperlink, parse_text, has_urwidgets +from urwid.util import decompose_tagmarkup +from toot.utils import urlencode_url + + +class ContentParser: + def __init__(self): + self.palette_names = [] + for p in PALETTE: + self.palette_names.append(p[0]) + + """Parse a limited subset of HTML and create urwid widgets.""" + + def html_to_widgets(self, html, recovery_attempt=False) -> List[urwid.Widget]: + """Convert html to urwid widgets""" + widgets: List[urwid.Widget] = [] + html = unicodedata.normalize("NFKC", html) + soup = BeautifulSoup(html.replace("'", "'"), "html.parser") + first_tag = True + for e in soup.body or soup: + if isinstance(e, NavigableString): + if first_tag and not recovery_attempt: + # if our first "tag" is a navigable string + # the HTML is out of spec, doesn't start with a tag, + # we see this in content from Pixelfed servers. + # attempt a fix by wrapping the HTML with

+ return self.html_to_widgets(f"

{html}

", recovery_attempt=True) + else: + continue + else: + name = e.name + # if our HTML starts with a tag, but not a block tag + # the HTML is out of spec. Attempt a fix by wrapping the + # HTML with

+ if ( + first_tag + and not recovery_attempt + and name + not in ( + "p", + "pre", + "li", + "blockquote", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + ) # NOTE: update this list if Mastodon starts supporting more block tags + ): + return self.html_to_widgets(f"

{html}

", recovery_attempt=True) + + # First, look for a custom tag handler method in this class + # If that fails, fall back to inline_tag_to_text handler + method = getattr(self, "_" + name, self.inline_tag_to_text) + markup = method(e) # either returns a Widget, or plain text + first_tag = False + + if not isinstance(markup, urwid.Widget): + # plaintext, so create a padded text widget + txt = self.text_to_widget("", markup) + markup = urwid.Padding( + txt, + align="left", + width=("relative", 100), + min_width=None, + ) + widgets.append(markup) + # separate top level widgets with a blank line + widgets.append(urwid.Divider(" ")) + return widgets[:-1] # but suppress the last blank line + + def inline_tag_to_text(self, tag) -> Tuple: + """Convert html tag to plain text with tag as attributes recursively""" + markups = self.process_inline_tag_children(tag) + if not markups: + return (tag.name, "") + return (tag.name, markups) + + def process_inline_tag_children(self, tag) -> List: + """Recursively retrieve all children + and convert to a list of markup text""" + markups = [] + for child in tag.children: + if isinstance(child, Tag): + method = getattr(self, "_" + child.name, self.inline_tag_to_text) + markup = method(child) + markups.append(markup) + else: + markups.append(child) + return markups + + def text_to_widget(self, attr, markup) -> urwid.Widget: + if not has_urwidgets: + return urwid.Text((attr, markup)) + + TRANSFORM = { + # convert http[s] URLs to Hyperlink widgets for nesting in a TextEmbed widget + re.compile(r"(^.+)\x03(.+$)"): lambda g: ( + len(g[1]), + urwid.Filler(Hyperlink(g[2], anchor_attr, g[1])), + ), + } + markup_list = [] + + for run in markup: + if isinstance(run, tuple): + txt, attr_list = decompose_tagmarkup(run) + # find anchor titles with an ETX separator followed by href + m = re.match(r"(^.+)\x03(.+$)", txt) + if m: + anchor_attr = self.get_best_anchor_attr(attr_list) + markup_list.append( + parse_text( + txt, + TRANSFORM, + lambda pattern, groups, span: TRANSFORM[pattern](groups), + ) + ) + else: + markup_list.append(run) + else: + markup_list.append(run) + + return TextEmbed(markup_list) + + def process_block_tag_children(self, tag) -> List[urwid.Widget]: + """Recursively retrieve all children + and convert to a list of widgets + any inline tags containing text will be + converted to Text widgets""" + + pre_widget_markups = [] + post_widget_markups = [] + child_widgets = [] + found_nested_widget = False + + for child in tag.children: + if isinstance(child, Tag): + # child is a nested tag; process using custom method + # or default to inline_tag_to_text + method = getattr(self, "_" + child.name, self.inline_tag_to_text) + result = method(child) + if isinstance(result, urwid.Widget): + found_nested_widget = True + child_widgets.append(result) + else: + if not found_nested_widget: + pre_widget_markups.append(result) + else: + post_widget_markups.append(result) + else: + # child is text; append to the appropriate markup list + if not found_nested_widget: + pre_widget_markups.append(child) + else: + post_widget_markups.append(child) + + widget_list = [] + if len(pre_widget_markups): + widget_list.append(self.text_to_widget(tag.name, pre_widget_markups)) + + if len(child_widgets): + widget_list += child_widgets + + if len(post_widget_markups): + widget_list.append(self.text_to_widget(tag.name, post_widget_markups)) + + return widget_list + + def get_urwid_attr_name(self, tag) -> str: + """Get the class name and translate to a + name suitable for use as an urwid + text attribute name""" + + if "class" in tag.attrs: + clss = tag.attrs["class"] + if len(clss) > 0: + style_name = "class_" + "_".join(clss) + # return the class name, only if we + # find it as a defined palette name + if style_name in self.palette_names: + return style_name + + # fallback to returning the tag name + return tag.name + + # Tag handlers start here. + # Tags not explicitly listed are "supported" by + # rendering as text. + # Inline tags return a list of marked up text for urwid.Text + # Block tags return urwid.Widget + + def basic_block_tag_handler(self, tag) -> urwid.Widget: + """default for block tags that need no special treatment""" + return urwid.Pile(self.process_block_tag_children(tag)) + + def get_best_anchor_attr(self, attrib_list) -> str: + if not attrib_list: + return "" + flat_al = list(flatten(attrib_list)) + + for a in flat_al[0]: + # ref: https://docs.joinmastodon.org/spec/activitypub/ + # these are the class names (translated to attrib names) + # that we can support for display + + try: + if a[0] in ["class_hashtag", "class_mention_hashtag", "class_mention"]: + return a[0] + except KeyError: + continue + + return "a" + + def _a(self, tag) -> Tuple: + """anchor tag handler""" + + markups = self.process_inline_tag_children(tag) + if not markups: + return (tag.name, "") + + href = tag.attrs["href"] + title, attrib_list = decompose_tagmarkup(markups) + if not attrib_list: + attrib_list = [tag] + if href and has_urwidgets: + # only if we have urwidgets loaded for OCS 8 hyperlinks: + # urlencode the path and query portions of the URL + href = urlencode_url(href) + # use ASCII ETX (end of record) as a + # delimiter between the title and the HREF + title += f"\x03{href}" + + attr = self.get_best_anchor_attr(attrib_list) + + if attr == "a": + # didn't find an attribute to use + # in the child markup, so let's + # try the anchor tag's own attributes + + attr = self.get_urwid_attr_name(tag) + + # hashtag anchors have a class of "mention hashtag" + # or "hashtag" + # we'll return style "class_mention_hashtag" + # or "class_hashtag" + # in that case; see corresponding palette entry + # in constants.py controlling hashtag highlighting + + return (attr, title) + + def _blockquote(self, tag) -> urwid.Widget: + widget_list = self.process_block_tag_children(tag) + blockquote_widget = urwid.LineBox( + urwid.Padding( + urwid.Pile(widget_list), + align="left", + width=("relative", 100), + min_width=None, + left=1, + right=1, + ), + tlcorner="", + tline="", + lline="│", + trcorner="", + blcorner="", + rline="", + bline="", + brcorner="", + ) + return urwid.Pile([urwid.AttrMap(blockquote_widget, "blockquote")]) + + def _br(self, tag) -> Tuple: + return ("br", "\n") + + def _em(self, tag) -> Tuple: + # to simplify the number of palette entries + # translate EM to I (italic) + markups = self.process_inline_tag_children(tag) + if not markups: + return ("i", "") + + # special case processing for bold and italic + for parent in tag.parents: + if parent.name == "b" or parent.name == "strong": + return ("bi", markups) + + return ("i", markups) + + def _ol(self, tag) -> urwid.Widget: + """ordered list tag handler""" + + widgets = [] + list_item_num = 1 + increment = -1 if tag.has_attr("reversed") else 1 + + # get ol start= attribute if present + if tag.has_attr("start") and len(tag.attrs["start"]) > 0: + try: + list_item_num = int(tag.attrs["start"]) + except ValueError: + pass + + for li in tag.find_all("li", recursive=False): + method = getattr(self, "_li", self.inline_tag_to_text) + markup = method(li) + + # li value= attribute will change the item number + # it also overrides any ol start= attribute + + if li.has_attr("value") and len(li.attrs["value"]) > 0: + try: + list_item_num = int(li.attrs["value"]) + except ValueError: + pass + + if not isinstance(markup, urwid.Widget): + txt = self.text_to_widget("li", [str(list_item_num), ". ", markup]) + # 1. foo, 2. bar, etc. + widgets.append(txt) + else: + txt = self.text_to_widget("li", [str(list_item_num), ". "]) + columns = urwid.Columns( + [txt, ("weight", 9999, markup)], dividechars=1, min_width=3 + ) + widgets.append(columns) + + list_item_num += increment + + return urwid.Pile(widgets) + + def _pre(self, tag) -> urwid.Widget: + #
 tag spec says that text should not wrap,
+        # but horizontal screen space is at a premium
+        # and we have no horizontal scroll bar, so allow
+        # wrapping.
+
+        widget_list = [urwid.Divider(" ")]
+        widget_list += self.process_block_tag_children(tag)
+
+        pre_widget = urwid.Padding(
+            urwid.Pile(widget_list),
+            align="left",
+            width=("relative", 100),
+            min_width=None,
+            left=1,
+            right=1,
+        )
+        return urwid.Pile([urwid.AttrMap(pre_widget, "pre")])
+
+    def _span(self, tag) -> Tuple:
+        markups = self.process_inline_tag_children(tag)
+
+        if not markups:
+            return (tag.name, "")
+
+        # span inherits its parent's class definition
+        # unless it has a specific class definition
+        # of its own
+
+        if "class" in tag.attrs:
+            # uncomment the following code to hide all HTML marked
+            # invisible (generally, the http:// prefix of URLs)
+            # could be a user preference, it's only advisable if
+            # the terminal supports OCS 8 hyperlinks (and that's not
+            # automatically detectable)
+
+            # if "invisible" in tag.attrs["class"]:
+            #     return (tag.name, "")
+
+            style_name = self.get_urwid_attr_name(tag)
+
+            if style_name != "span":
+                # unique class name matches an entry in our palette
+                return (style_name, markups)
+
+        if tag.parent:
+            return (self.get_urwid_attr_name(tag.parent), markups)
+        else:
+            # fallback
+            return ("span", markups)
+
+    def _strong(self, tag) -> Tuple:
+        # to simplify the number of palette entries
+        # translate STRONG to B (bold)
+        markups = self.process_inline_tag_children(tag)
+        if not markups:
+            return ("b", "")
+
+        # special case processing for bold and italic
+        for parent in tag.parents:
+            if parent.name == "i" or parent.name == "em":
+                return ("bi", markups)
+
+        return ("b", markups)
+
+    def _ul(self, tag) -> urwid.Widget:
+        """unordered list tag handler"""
+
+        widgets = []
+
+        for li in tag.find_all("li", recursive=False):
+            method = getattr(self, "_li", self.inline_tag_to_text)
+            markup = method(li)
+
+            if not isinstance(markup, urwid.Widget):
+                txt = self.text_to_widget("li", ["\N{bullet} ", markup])
+                # * foo, * bar, etc.
+                widgets.append(txt)
+            else:
+                txt = self.text_to_widget("li", ["\N{bullet} "])
+                columns = urwid.Columns(
+                    [txt, ("weight", 9999, markup)], dividechars=1, min_width=3
+                )
+                widgets.append(columns)
+
+        return urwid.Pile(widgets)
+
+    # These tags are handled identically to others
+    # the only difference being the tag name used for
+    # urwid attribute mapping
+
+    _b = _strong
+
+    _div = basic_block_tag_handler
+
+    _i = _em
+
+    _li = basic_block_tag_handler
+
+    # Glitch-soc and Pleroma allow 

...

in content + # Mastodon (PR #23913) does not; header tags are converted to

+ + _h1 = _h2 = _h3 = _h4 = _h5 = _h6 = basic_block_tag_handler + + _p = basic_block_tag_handler + + +def flatten(data): + if isinstance(data, tuple): + for x in data: + yield from flatten(x) + else: + yield data diff --git a/toot/tui/stubs/stub_hyperlink.py b/toot/tui/stubs/stub_hyperlink.py new file mode 100644 index 0000000..a2831ba --- /dev/null +++ b/toot/tui/stubs/stub_hyperlink.py @@ -0,0 +1,30 @@ +__all__ = ("Hyperlink",) + +import urwid + + +class Hyperlink(urwid.WidgetWrap): + def __init__( + self, + uri, + attr, + text, + ): + pass + + def render(self, size, focus): + return None + + +class HyperlinkCanvas(urwid.Canvas): + def __init__(self, uri: str, text_canv: urwid.TextCanvas): + pass + + def cols(self): + return 0 + + def content(self, *args, **kwargs): + yield [None] + + def rows(self): + return 0 diff --git a/toot/tui/stubs/stub_text_embed.py b/toot/tui/stubs/stub_text_embed.py new file mode 100644 index 0000000..bf587d2 --- /dev/null +++ b/toot/tui/stubs/stub_text_embed.py @@ -0,0 +1,29 @@ +__all__ = ("parse_text", "TextEmbed") + +import urwid + + +class TextEmbed(urwid.Text): + def get_text( + self, + ): + return None + + def render(self, size, focus): + return None + + def set_text(self, markup): + pass + + def set_wrap_mode(self, mode): + pass + + +def parse_text( + text, + patterns, + repl, + *repl_args, + **repl_kwargs, +): + return None diff --git a/toot/tui/stubs/urwidgets.py b/toot/tui/stubs/urwidgets.py new file mode 100644 index 0000000..92737d3 --- /dev/null +++ b/toot/tui/stubs/urwidgets.py @@ -0,0 +1,8 @@ +# If urwidgets is loaded use it; otherwise use our stubs +try: + from urwidgets import Hyperlink, TextEmbed, parse_text + has_urwidgets = True +except ImportError: + from .stub_hyperlink import Hyperlink + from .stub_text_embed import TextEmbed, parse_text + has_urwidgets = False diff --git a/toot/tui/timeline.py b/toot/tui/timeline.py index ddc4a35..1fef40c 100644 --- a/toot/tui/timeline.py +++ b/toot/tui/timeline.py @@ -1,18 +1,21 @@ import logging +import re import urwid import webbrowser from typing import List, Optional from toot.tui import app -from toot.utils import format_content from toot.utils.datetime import parse_datetime, time_ago from toot.utils.language import language_name -from .entities import Status -from .scroll import Scrollable, ScrollBar -from .utils import highlight_hashtags, highlight_keys -from .widgets import SelectableText, SelectableColumns +from toot.entities import Status +from toot.tui.scroll import Scrollable, ScrollBar +from toot.tui.utils import highlight_keys +from toot.tui.widgets import SelectableText, SelectableColumns +from toot.tui.richtext import ContentParser +from toot.utils import urlencode_url +from toot.tui.stubs.urwidgets import Hyperlink, TextEmbed, parse_text, has_urwidgets logger = logging.getLogger("toot") @@ -310,7 +313,6 @@ class Timeline(urwid.Columns): class StatusDetails(urwid.Pile): def __init__(self, timeline: Timeline, status: Optional[Status]): self.status = status - self.followed_tags = timeline.tui.followed_tags self.followed_accounts = timeline.tui.followed_accounts reblogged_by = status.author if status and status.reblog else None @@ -318,6 +320,20 @@ class StatusDetails(urwid.Pile): if status else ()) return super().__init__(widget_list) + def linkify_content(self, text) -> urwid.Widget: + if not has_urwidgets: + return urwid.Text(("link", text)) + TRANSFORM = { + # convert http[s] URLs to Hyperlink widgets for nesting in a TextEmbed widget + re.compile(r'(https?://[^\s]+)'): + lambda g: (len(g[1]), urwid.Filler(Hyperlink(urlencode_url(g[1]), "link", g[1]))), + } + markup_list = [] + + markup_list.append(parse_text(text, TRANSFORM, + lambda pattern, groups, span: TRANSFORM[pattern](groups))) + return TextEmbed(markup_list, align='left') + def content_generator(self, status, reblogged_by): if reblogged_by: text = "♺ {} boosted".format(reblogged_by.display_name or reblogged_by.username) @@ -340,8 +356,12 @@ class StatusDetails(urwid.Pile): yield ("pack", urwid.Text(("content_warning", "Marked as sensitive. Press S to view."))) else: content = status.original.translation if status.original.show_translation else status.data["content"] - for line in format_content(content): - yield ("pack", urwid.Text(highlight_hashtags(line, self.followed_tags))) + + parser = ContentParser() + widgetlist = parser.html_to_widgets(content) + + for line in widgetlist: + yield (line) media = status.data["media_attachments"] if media: @@ -350,7 +370,7 @@ class StatusDetails(urwid.Pile): yield ("pack", urwid.Text([("bold", "Media attachment"), " (", m["type"], ")"])) if m["description"]: yield ("pack", urwid.Text(m["description"])) - yield ("pack", urwid.Text(("link", m["url"]))) + yield ("pack", self.linkify_content(m["url"])) poll = status.original.data.get("poll") if poll: @@ -410,7 +430,7 @@ class StatusDetails(urwid.Pile): if card["description"]: yield urwid.Text(card["description"].strip()) yield urwid.Text("") - yield urwid.Text(("link", card["url"])) + yield self.linkify_content(card["url"]) def poll_generator(self, poll): for idx, option in enumerate(poll["options"]): diff --git a/toot/tui/urwidgets.py b/toot/tui/urwidgets.py new file mode 100644 index 0000000..ee731a8 --- /dev/null +++ b/toot/tui/urwidgets.py @@ -0,0 +1,8 @@ +# If urwidgets is loaded use it; otherwise use our stubs +try: + from urwidgets import Hyperlink, TextEmbed, parse_text # noqa: F401 + has_urwidgets = True +except ImportError: + from .stub_hyperlink import Hyperlink # noqa: F401 + from .stub_text_embed import TextEmbed, parse_text # noqa: F401 + has_urwidgets = False diff --git a/toot/tui/utils.py b/toot/tui/utils.py index 377522b..0ccff9d 100644 --- a/toot/tui/utils.py +++ b/toot/tui/utils.py @@ -35,21 +35,6 @@ def highlight_keys(text, high_attr, low_attr=""): return list(_gen()) -def highlight_hashtags(line, followed_tags, attr="hashtag", followed_attr="hashtag_followed"): - hline = [] - - for p in re.split(HASHTAG_PATTERN, line): - if p.startswith("#"): - if p[1:].lower() in (t.lower() for t in followed_tags): - hline.append((followed_attr, p)) - else: - hline.append((attr, p)) - else: - hline.append(p) - - return hline - - def show_media(paths): """ Attempt to open an image viewer to show given media files. diff --git a/toot/utils/__init__.py b/toot/utils/__init__.py index e8103ac..43af373 100644 --- a/toot/utils/__init__.py +++ b/toot/utils/__init__.py @@ -10,6 +10,7 @@ from bs4 import BeautifulSoup from typing import Dict from toot.exceptions import ConsoleError +from urllib.parse import urlparse, urlencode, quote, unquote def str_bool(b): @@ -186,3 +187,14 @@ def _warn_scheme_deprecated(): "instead write:", " toot instance http://unsafehost.com\n" ])) + + +def urlencode_url(url): + parsed_url = urlparse(url) + + # unencode before encoding, to prevent double-urlencoding + encoded_path = quote(unquote(parsed_url.path), safe="-._~()'!*:@,;+&=/") + encoded_query = urlencode({k: quote(unquote(v), safe="-._~()'!*:@,;?/") for k, v in parsed_url.params}) + encoded_url = parsed_url._replace(path=encoded_path, params=encoded_query).geturl() + + return encoded_url