Merge pull request #415 from ihabunek/danschwarz-richtext3

Add support for rich text
2023-11-18 15:40:35 +01:00 · 2023-11-18 15:40:35 +01:00 · 317840b019
commit 317840b019
parent fe8b441b5b 9b9c153531
15 changed files with 605 additions and 57 deletions
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@ -18,7 +18,7 @@ jobs:
      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
-          pip install -e .
+          pip install -e .\[richtext\]
          pip install -r requirements-test.txt
      - name: Run tests
        run: |
--- a/requirements.txt
+++ b/requirements.txt
@ -2,4 +2,4 @@ requests>=2.13,<3.0
 beautifulsoup4>=4.5.0,<5.0
 wcwidth>=0.1.7
 urwid>=2.0.0,<3.0
-
+urwidgets>=0.1,<0.2
--- a/setup.py
+++ b/setup.py
@ -31,7 +31,7 @@ setup(
        'License :: OSI Approved :: GNU General Public License v3 (GPLv3)',
        'Programming Language :: Python :: 3',
    ],
-    packages=['toot', 'toot.tui', 'toot.utils'],
+    packages=['toot', 'toot.tui', 'toot.tui.richtext', 'toot.utils'],
    python_requires=">=3.7",
    install_requires=[
        "requests>=2.13,<3.0",
@ -40,6 +40,9 @@ setup(
        "urwid>=2.0.0,<3.0",
        "tomlkit>=0.10.0,<1.0"
    ],
+    extras_require={
+        "richtext": ['urwidgets>=0.1,<0.2'],
+    },
    entry_points={
        'console_scripts': [
            'toot=toot.console:main',
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@ -3,6 +3,7 @@ import pytest

 from toot.console import duration
 from toot.wcstring import wc_wrap, trunc, pad, fit_text
+from toot.utils import urlencode_url


 def test_pad():
@ -201,3 +202,8 @@ def test_duration():

    with pytest.raises(ArgumentTypeError):
        duration("banana")
+
+
+def test_urlencode_url():
+    assert urlencode_url("https://www.example.com") == "https://www.example.com"
+    assert urlencode_url("https://www.example.com/url%20with%20spaces") == "https://www.example.com/url%20with%20spaces"
--- a/tests/tui/test_rich_text.py
+++ b/tests/tui/test_rich_text.py
@ -0,0 +1,45 @@
+from urwid import Divider, Filler, Pile
+from toot.tui.richtext import url_to_widget
+from urwidgets import Hyperlink, TextEmbed
+
+from toot.tui.richtext.richtext import html_to_widgets
+
+
+def test_url_to_widget():
+    url = "http://foo.bar"
+    embed_widget = url_to_widget(url)
+    assert isinstance(embed_widget, TextEmbed)
+
+    [(filler, length)] = embed_widget.embedded
+    assert length == len(url)
+    assert isinstance(filler, Filler)
+
+    link_widget: Hyperlink = filler.base_widget
+    assert isinstance(link_widget, Hyperlink)
+
+    assert link_widget.attrib == "link"
+    assert link_widget.text == url
+    assert link_widget.uri == url
+
+
+def test_html_to_widgets():
+    html = """
+    <p>foo</p>
+    <p>foo <b>bar</b> <i>baz</i></p>
+    """.strip()
+
+    [foo, divider, bar] = html_to_widgets(html)
+
+    assert isinstance(foo, Pile)
+    assert isinstance(divider, Divider)
+    assert isinstance(bar, Pile)
+
+    [foo_embed] = foo.widget_list
+    assert foo_embed.embedded == []
+    assert foo_embed.attrib == []
+    assert foo_embed.text == "foo"
+
+    [bar_embed] = bar.widget_list
+    assert bar_embed.embedded == []
+    assert bar_embed.attrib == [(None, 4), ("b", 3), (None, 1), ("i", 3)]
+    assert bar_embed.text == "foo bar baz"
--- a/toot/output.py
+++ b/toot/output.py
@ -6,7 +6,7 @@ import textwrap
 from functools import lru_cache
 from toot import settings
 from toot.entities import Instance, Notification, Poll, Status
-from toot.utils import get_text, parse_html
+from toot.utils import get_text, html_to_paragraphs
 from toot.wcstring import wc_wrap
 from typing import List
 from wcwidth import wcswidth
@ -321,7 +321,7 @@ def print_status(status: Status, width: int = 80):

 def print_html(text, width=80):
    first = True
-    for paragraph in parse_html(text):
+    for paragraph in html_to_paragraphs(text):
        if not first:
            print_out("")
        for line in paragraph:
--- a/toot/tui/app.py
+++ b/toot/tui/app.py
@ -143,7 +143,6 @@ class TUI(urwid.Frame):
    def run(self):
        self.loop.set_alarm_in(0, lambda *args: self.async_load_instance())
        self.loop.set_alarm_in(0, lambda *args: self.async_load_followed_accounts())
-        self.loop.set_alarm_in(0, lambda *args: self.async_load_followed_tags())
        self.loop.set_alarm_in(0, lambda *args: self.async_load_timeline(
            is_initial=True, timeline_name="home"))
        self.loop.run()
@ -339,22 +338,6 @@ class TUI(urwid.Frame):

        self.run_in_thread(_load_accounts, done_callback=_done_accounts)

-    def async_load_followed_tags(self):
-        def _load_tag_list():
-            try:
-                return api.followed_tags(self.app, self.user)
-            except ApiError:
-                # not supported by all Mastodon servers so fail silently if necessary
-                return []
-
-        def _done_tag_list(tags):
-            if len(tags) > 0:
-                self.followed_tags = [t["name"] for t in tags]
-            else:
-                self.followed_tags = []
-
-        self.run_in_thread(_load_tag_list, done_callback=_done_tag_list)
-
    def refresh_footer(self, timeline):
        """Show status details in footer."""
        status, index, count = timeline.get_focused_status_with_counts()
--- a/toot/tui/constants.py
+++ b/toot/tui/constants.py
@ -57,6 +57,29 @@ PALETTE = [
    ('dim', 'dark gray', ''),
    ('highlight', 'yellow', ''),
    ('success', 'dark green', ''),
+
+    # HTML tag styling
+    ('a', ',italics', '', 'italics'),
+    # em tag is mapped to i
+    ('i', ',italics', '', 'italics'),
+    # strong tag is mapped to b
+    ('b', ',bold', '', 'bold'),
+    # special case for bold + italic nested tags
+    ('bi', ',bold,italics', '', ',bold,italics'),
+    ('u', ',underline', '', ',underline'),
+    ('del', ',strikethrough', '', ',strikethrough'),
+    ('code', 'light gray, standout', '', ',standout'),
+    ('pre', 'light gray, standout', '', ',standout'),
+    ('blockquote', 'light gray', '', ''),
+    ('h1', ',bold', '', ',bold'),
+    ('h2', ',bold', '', ',bold'),
+    ('h3', ',bold', '', ',bold'),
+    ('h4', ',bold', '', ',bold'),
+    ('h5', ',bold', '', ',bold'),
+    ('h6', ',bold', '', ',bold'),
+    ('class_mention_hashtag', 'light cyan', '', ''),
+    ('class_hashtag', 'light cyan', '', ''),
+
 ]

 VISIBILITY_OPTIONS = [
--- a/toot/tui/overlays.py
+++ b/toot/tui/overlays.py
@ -4,10 +4,10 @@ import urwid
 import webbrowser

 from toot import __version__
-from toot.utils import format_content
-from .utils import highlight_hashtags, highlight_keys
-from .widgets import Button, EditBox, SelectableText
 from toot import api
+from toot.tui.utils import highlight_keys
+from toot.tui.widgets import Button, EditBox, SelectableText
+from toot.tui.richtext import html_to_widgets


 class StatusSource(urwid.Padding):
@ -279,8 +279,10 @@ class Account(urwid.ListBox):

        if account["note"]:
            yield urwid.Divider()
-            for line in format_content(account["note"]):
-                yield urwid.Text(highlight_hashtags(line, followed_tags=set()))
+
+            widgetlist = html_to_widgets(account["note"])
+            for line in widgetlist:
+                yield (line)

        yield urwid.Divider()
        yield urwid.Text(["ID: ", ("highlight", f"{account['id']}")])
@ -312,8 +314,11 @@ class Account(urwid.ListBox):
                name = field["name"].title()
                yield urwid.Divider()
                yield urwid.Text([("bold", f"{name.rstrip(':')}"), ":"])
-                for line in format_content(field["value"]):
-                    yield urwid.Text(highlight_hashtags(line, followed_tags=set()))
+
+                widgetlist = html_to_widgets(field["value"])
+                for line in widgetlist:
+                    yield (line)
+
                if field["verified_at"]:
                    yield urwid.Text(("success", "✓ Verified"))

--- a/toot/tui/poll.py
+++ b/toot/tui/poll.py
@ -2,11 +2,9 @@ import urwid

 from toot import api
 from toot.exceptions import ApiError
-from toot.utils import format_content
 from toot.utils.datetime import parse_datetime
-
-from .utils import highlight_hashtags
 from .widgets import Button, CheckBox, RadioButton
+from .richtext import html_to_widgets


 class Poll(urwid.ListBox):
@ -87,8 +85,11 @@ class Poll(urwid.ListBox):

    def generate_contents(self, status):
        yield urwid.Divider()
-        for line in format_content(status.data["content"]):
-            yield urwid.Text(highlight_hashtags(line, set()))
+
+        widgetlist = html_to_widgets(status.data["content"])
+
+        for line in widgetlist:
+            yield (line)

        yield urwid.Divider()
        yield self.build_linebox(self.generate_poll_detail())
--- a/toot/tui/richtext/init.py
+++ b/toot/tui/richtext/init.py
@ -0,0 +1,18 @@
+import urwid
+
+from toot.tui.utils import highlight_hashtags
+from toot.utils import format_content
+from typing import List
+
+try:
+    from .richtext import html_to_widgets, url_to_widget
+except ImportError:
+    # Fallback if urwidgets are not available
+    def html_to_widgets(html: str) -> List[urwid.Widget]:
+        return [
+            urwid.Text(highlight_hashtags(line))
+            for line in format_content(html)
+        ]
+
+    def url_to_widget(url: str):
+        return urwid.Text(("link", url))
--- a/toot/tui/richtext/richtext.py
+++ b/toot/tui/richtext/richtext.py
@ -0,0 +1,452 @@
+import re
+import urwid
+import unicodedata
+
+from bs4.element import NavigableString, Tag
+from toot.tui.constants import PALETTE
+from toot.utils import parse_html, urlencode_url
+from typing import List, Tuple
+from urwid.util import decompose_tagmarkup
+from urwidgets import Hyperlink, TextEmbed
+
+
+STYLE_NAMES = [p[0] for p in PALETTE]
+
+# NOTE: update this list if Mastodon starts supporting more block tags
+BLOCK_TAGS = ["p", "pre", "li", "blockquote", "h1", "h2", "h3", "h4", "h5", "h6"]
+
+
+def html_to_widgets(html, recovery_attempt=False) -> List[urwid.Widget]:
+    """Convert html to urwid widgets"""
+    widgets: List[urwid.Widget] = []
+    html = unicodedata.normalize("NFKC", html)
+    soup = parse_html(html)
+
+    first_tag = True
+    for e in soup.body or soup:
+        if isinstance(e, NavigableString):
+            if first_tag and not recovery_attempt:
+                # if our first "tag" is a navigable string
+                # the HTML is out of spec, doesn't start with a tag,
+                # we see this in content from Pixelfed servers.
+                # attempt a fix by wrapping the HTML with <p></p>
+                return html_to_widgets(f"<p>{html}</p>", recovery_attempt=True)
+            else:
+                continue
+        else:
+            name = e.name
+            # if our HTML starts with a tag, but not a block tag
+            # the HTML is out of spec. Attempt a fix by wrapping the
+            # HTML with <p></p>
+            if (first_tag and not recovery_attempt and name not in BLOCK_TAGS):
+                return html_to_widgets(f"<p>{html}</p>", recovery_attempt=True)
+
+            markup = render(name, e)
+            first_tag = False
+
+        if not isinstance(markup, urwid.Widget):
+            # plaintext, so create a padded text widget
+            txt = text_to_widget("", markup)
+            markup = urwid.Padding(
+                txt,
+                align="left",
+                width=("relative", 100),
+                min_width=None,
+            )
+        widgets.append(markup)
+        # separate top level widgets with a blank line
+        widgets.append(urwid.Divider(" "))
+    return widgets[:-1]  # but suppress the last blank line
+
+
+def url_to_widget(url: str):
+    widget = len(url), urwid.Filler(Hyperlink(url, "link", url))
+    return TextEmbed(widget)
+
+
+def inline_tag_to_text(tag) -> Tuple:
+    """Convert html tag to plain text with tag as attributes recursively"""
+    markups = process_inline_tag_children(tag)
+    if not markups:
+        return (tag.name, "")
+    return (tag.name, markups)
+
+
+def process_inline_tag_children(tag) -> List:
+    """Recursively retrieve all children
+    and convert to a list of markup text"""
+    markups = []
+    for child in tag.children:
+        if isinstance(child, Tag):
+            markup = render(child.name, child)
+            markups.append(markup)
+        else:
+            markups.append(child)
+    return markups
+
+
+URL_PATTERN = re.compile(r"(^.+)\x03(.+$)")
+
+
+def text_to_widget(attr, markup) -> urwid.Widget:
+    markup_list = []
+    for run in markup:
+        if isinstance(run, tuple):
+            txt, attr_list = decompose_tagmarkup(run)
+            # find anchor titles with an ETX separator followed by href
+            match = URL_PATTERN.match(txt)
+            if match:
+                label, url = match.groups()
+                anchor_attr = get_best_anchor_attr(attr_list)
+                markup_list.append((
+                    len(label),
+                    urwid.Filler(Hyperlink(url, anchor_attr, label)),
+                ))
+            else:
+                markup_list.append(run)
+        else:
+            markup_list.append(run)
+
+    return TextEmbed(markup_list)
+
+
+def process_block_tag_children(tag) -> List[urwid.Widget]:
+    """Recursively retrieve all children
+    and convert to a list of widgets
+    any inline tags containing text will be
+    converted to Text widgets"""
+
+    pre_widget_markups = []
+    post_widget_markups = []
+    child_widgets = []
+    found_nested_widget = False
+
+    for child in tag.children:
+        if isinstance(child, Tag):
+            # child is a nested tag; process using custom method
+            # or default to inline_tag_to_text
+            result = render(child.name, child)
+            if isinstance(result, urwid.Widget):
+                found_nested_widget = True
+                child_widgets.append(result)
+            else:
+                if not found_nested_widget:
+                    pre_widget_markups.append(result)
+                else:
+                    post_widget_markups.append(result)
+        else:
+            # child is text; append to the appropriate markup list
+            if not found_nested_widget:
+                pre_widget_markups.append(child)
+            else:
+                post_widget_markups.append(child)
+
+    widget_list = []
+    if len(pre_widget_markups):
+        widget_list.append(text_to_widget(tag.name, pre_widget_markups))
+
+    if len(child_widgets):
+        widget_list += child_widgets
+
+    if len(post_widget_markups):
+        widget_list.append(text_to_widget(tag.name, post_widget_markups))
+
+    return widget_list
+
+
+def get_urwid_attr_name(tag) -> str:
+    """Get the class name and translate to a
+    name suitable for use as an urwid
+    text attribute name"""
+
+    if "class" in tag.attrs:
+        clss = tag.attrs["class"]
+        if len(clss) > 0:
+            style_name = "class_" + "_".join(clss)
+            # return the class name, only if we
+            # find it as a defined palette name
+            if style_name in STYLE_NAMES:
+                return style_name
+
+    # fallback to returning the tag name
+    return tag.name
+
+
+def basic_block_tag_handler(tag) -> urwid.Widget:
+    """default for block tags that need no special treatment"""
+    return urwid.Pile(process_block_tag_children(tag))
+
+
+def get_best_anchor_attr(attrib_list) -> str:
+    if not attrib_list:
+        return ""
+    flat_al = list(flatten(attrib_list))
+
+    for a in flat_al[0]:
+        # ref: https://docs.joinmastodon.org/spec/activitypub/
+        # these are the class names (translated to attrib names)
+        # that we can support for display
+
+        try:
+            if a[0] in ["class_hashtag", "class_mention_hashtag", "class_mention"]:
+                return a[0]
+        except KeyError:
+            continue
+
+    return "a"
+
+
+def render(attr: str, content: str):
+    if attr in ["a"]:
+        return render_anchor(content)
+
+    if attr in ["blockquote"]:
+        return render_blockquote(content)
+
+    if attr in ["br"]:
+        return render_br(content)
+
+    if attr in ["em"]:
+        return render_em(content)
+
+    if attr in ["ol"]:
+        return render_ol(content)
+
+    if attr in ["pre"]:
+        return render_pre(content)
+
+    if attr in ["span"]:
+        return render_span(content)
+
+    if attr in ["b", "strong"]:
+        return render_strong(content)
+
+    if attr in ["ul"]:
+        return render_ul(content)
+
+    # Glitch-soc and Pleroma allow <H1>...<H6> in content
+    # Mastodon (PR #23913) does not; header tags are converted to <P><STRONG></STRONG></P>
+    if attr in ["p", "div", "li", "h1", "h2", "h3", "h4", "h5", "h6"]:
+        return basic_block_tag_handler(content)
+
+    # Fall back to inline_tag_to_text handler
+    return inline_tag_to_text(content)
+
+
+def render_anchor(tag) -> Tuple:
+    """anchor tag handler"""
+
+    markups = process_inline_tag_children(tag)
+    if not markups:
+        return (tag.name, "")
+
+    href = tag.attrs["href"]
+    title, attrib_list = decompose_tagmarkup(markups)
+    if not attrib_list:
+        attrib_list = [tag]
+    if href:
+        # urlencode the path and query portions of the URL
+        href = urlencode_url(href)
+        # use ASCII ETX (end of record) as a
+        # delimiter between the title and the HREF
+        title += f"\x03{href}"
+
+    attr = get_best_anchor_attr(attrib_list)
+
+    if attr == "a":
+        # didn't find an attribute to use
+        # in the child markup, so let's
+        # try the anchor tag's own attributes
+
+        attr = get_urwid_attr_name(tag)
+
+    # hashtag anchors have a class of "mention hashtag"
+    # or "hashtag"
+    # we'll return style "class_mention_hashtag"
+    # or "class_hashtag"
+    # in that case; see corresponding palette entry
+    # in constants.py controlling hashtag highlighting
+
+    return (attr, title)
+
+
+def render_blockquote(tag) -> urwid.Widget:
+    widget_list = process_block_tag_children(tag)
+    blockquote_widget = urwid.LineBox(
+        urwid.Padding(
+            urwid.Pile(widget_list),
+            align="left",
+            width=("relative", 100),
+            min_width=None,
+            left=1,
+            right=1,
+        ),
+        tlcorner="",
+        tline="",
+        lline="│",
+        trcorner="",
+        blcorner="",
+        rline="",
+        bline="",
+        brcorner="",
+    )
+    return urwid.Pile([urwid.AttrMap(blockquote_widget, "blockquote")])
+
+
+def render_br(tag) -> Tuple:
+    return ("br", "\n")
+
+
+def render_em(tag) -> Tuple:
+    # to simplify the number of palette entries
+    # translate EM to I (italic)
+    markups = process_inline_tag_children(tag)
+    if not markups:
+        return ("i", "")
+
+    # special case processing for bold and italic
+    for parent in tag.parents:
+        if parent.name == "b" or parent.name == "strong":
+            return ("bi", markups)
+
+    return ("i", markups)
+
+
+def render_ol(tag) -> urwid.Widget:
+    """ordered list tag handler"""
+
+    widgets = []
+    list_item_num = 1
+    increment = -1 if tag.has_attr("reversed") else 1
+
+    # get ol start= attribute if present
+    if tag.has_attr("start") and len(tag.attrs["start"]) > 0:
+        try:
+            list_item_num = int(tag.attrs["start"])
+        except ValueError:
+            pass
+
+    for li in tag.find_all("li", recursive=False):
+        markup = render("li", li)
+
+        # li value= attribute will change the item number
+        # it also overrides any ol start= attribute
+
+        if li.has_attr("value") and len(li.attrs["value"]) > 0:
+            try:
+                list_item_num = int(li.attrs["value"])
+            except ValueError:
+                pass
+
+        if not isinstance(markup, urwid.Widget):
+            txt = text_to_widget("li", [str(list_item_num), ". ", markup])
+            # 1. foo, 2. bar, etc.
+            widgets.append(txt)
+        else:
+            txt = text_to_widget("li", [str(list_item_num), ". "])
+            columns = urwid.Columns(
+                [txt, ("weight", 9999, markup)], dividechars=1, min_width=3
+            )
+            widgets.append(columns)
+
+        list_item_num += increment
+
+    return urwid.Pile(widgets)
+
+
+def render_pre(tag) -> urwid.Widget:
+    # <PRE> tag spec says that text should not wrap,
+    # but horizontal screen space is at a premium
+    # and we have no horizontal scroll bar, so allow
+    # wrapping.
+
+    widget_list = [urwid.Divider(" ")]
+    widget_list += process_block_tag_children(tag)
+
+    pre_widget = urwid.Padding(
+        urwid.Pile(widget_list),
+        align="left",
+        width=("relative", 100),
+        min_width=None,
+        left=1,
+        right=1,
+    )
+    return urwid.Pile([urwid.AttrMap(pre_widget, "pre")])
+
+
+def render_span(tag) -> Tuple:
+    markups = process_inline_tag_children(tag)
+
+    if not markups:
+        return (tag.name, "")
+
+    # span inherits its parent's class definition
+    # unless it has a specific class definition
+    # of its own
+
+    if "class" in tag.attrs:
+        # uncomment the following code to hide all HTML marked
+        # invisible (generally, the http:// prefix of URLs)
+        # could be a user preference, it's only advisable if
+        # the terminal supports OCS 8 hyperlinks (and that's not
+        # automatically detectable)
+
+        # if "invisible" in tag.attrs["class"]:
+        #     return (tag.name, "")
+
+        style_name = get_urwid_attr_name(tag)
+
+        if style_name != "span":
+            # unique class name matches an entry in our palette
+            return (style_name, markups)
+
+    if tag.parent:
+        return (get_urwid_attr_name(tag.parent), markups)
+    else:
+        # fallback
+        return ("span", markups)
+
+
+def render_strong(tag) -> Tuple:
+    # to simplify the number of palette entries
+    # translate STRONG to B (bold)
+    markups = process_inline_tag_children(tag)
+    if not markups:
+        return ("b", "")
+
+    # special case processing for bold and italic
+    for parent in tag.parents:
+        if parent.name == "i" or parent.name == "em":
+            return ("bi", markups)
+
+    return ("b", markups)
+
+
+def render_ul(tag) -> urwid.Widget:
+    """unordered list tag handler"""
+
+    widgets = []
+
+    for li in tag.find_all("li", recursive=False):
+        markup = render("li", li)
+
+        if not isinstance(markup, urwid.Widget):
+            txt = text_to_widget("li", ["\N{bullet} ", markup])
+            # * foo, * bar, etc.
+            widgets.append(txt)
+        else:
+            txt = text_to_widget("li", ["\N{bullet} "])
+            columns = urwid.Columns(
+                [txt, ("weight", 9999, markup)], dividechars=1, min_width=3
+            )
+            widgets.append(columns)
+
+    return urwid.Pile(widgets)
+
+
+def flatten(data):
+    if isinstance(data, tuple):
+        for x in data:
+            yield from flatten(x)
+    else:
+        yield data
--- a/toot/tui/timeline.py
+++ b/toot/tui/timeline.py
@ -5,14 +5,14 @@ import webbrowser
 from typing import List, Optional

 from toot.tui import app
-from toot.utils import format_content
+from toot.tui.richtext import html_to_widgets, url_to_widget
 from toot.utils.datetime import parse_datetime, time_ago
 from toot.utils.language import language_name

-from .entities import Status
-from .scroll import Scrollable, ScrollBar
-from .utils import highlight_hashtags, highlight_keys
-from .widgets import SelectableText, SelectableColumns
+from toot.entities import Status
+from toot.tui.scroll import Scrollable, ScrollBar
+from toot.tui.utils import highlight_keys
+from toot.tui.widgets import SelectableText, SelectableColumns

 logger = logging.getLogger("toot")

@ -310,7 +310,6 @@ class Timeline(urwid.Columns):
 class StatusDetails(urwid.Pile):
    def __init__(self, timeline: Timeline, status: Optional[Status]):
        self.status = status
-        self.followed_tags = timeline.tui.followed_tags
        self.followed_accounts = timeline.tui.followed_accounts

        reblogged_by = status.author if status and status.reblog else None
@ -340,8 +339,10 @@ class StatusDetails(urwid.Pile):
            yield ("pack", urwid.Text(("content_warning", "Marked as sensitive. Press S to view.")))
        else:
            content = status.original.translation if status.original.show_translation else status.data["content"]
-            for line in format_content(content):
-                yield ("pack", urwid.Text(highlight_hashtags(line, self.followed_tags)))
+            widgetlist = html_to_widgets(content)
+
+            for line in widgetlist:
+                yield (line)

            media = status.data["media_attachments"]
            if media:
@ -350,7 +351,7 @@ class StatusDetails(urwid.Pile):
                    yield ("pack", urwid.Text([("bold", "Media attachment"), " (", m["type"], ")"]))
                    if m["description"]:
                        yield ("pack", urwid.Text(m["description"]))
-                    yield ("pack", urwid.Text(("link", m["url"])))
+                    yield ("pack", url_to_widget(m["url"]))

            poll = status.original.data.get("poll")
            if poll:
@ -410,7 +411,7 @@ class StatusDetails(urwid.Pile):
        if card["description"]:
            yield urwid.Text(card["description"].strip())
            yield urwid.Text("")
-        yield urwid.Text(("link", card["url"]))
+        yield url_to_widget(card["url"])

    def poll_generator(self, poll):
        for idx, option in enumerate(poll["options"]):
--- a/toot/tui/utils.py
+++ b/toot/tui/utils.py
@ -35,15 +35,12 @@ def highlight_keys(text, high_attr, low_attr=""):
    return list(_gen())


-def highlight_hashtags(line, followed_tags, attr="hashtag", followed_attr="hashtag_followed"):
+def highlight_hashtags(line):
    hline = []

    for p in re.split(HASHTAG_PATTERN, line):
        if p.startswith("#"):
-            if p[1:].lower() in (t.lower() for t in followed_tags):
-                hline.append((followed_attr, p))
-            else:
-                hline.append((attr, p))
+            hline.append(("hashtag", p))
        else:
            hline.append(p)

--- a/toot/utils/init.py
+++ b/toot/utils/init.py
@ -10,6 +10,7 @@ from bs4 import BeautifulSoup
 from typing import Dict

 from toot.exceptions import ConsoleError
+from urllib.parse import urlparse, urlencode, quote, unquote


 def str_bool(b):
@ -22,20 +23,22 @@ def str_bool_nullable(b):
    return None if b is None else str_bool(b)


-def get_text(html):
-    """Converts html to text, strips all tags."""
-
+def parse_html(html: str) -> BeautifulSoup:
    # Ignore warnings made by BeautifulSoup, if passed something that looks like
    # a file (e.g. a dot which matches current dict), it will warn that the file
    # should be opened instead of passing a filename.
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
-        text = BeautifulSoup(html.replace('&apos;', "'"), "html.parser").get_text()
-
-    return unicodedata.normalize('NFKC', text)
+        return BeautifulSoup(html.replace("&apos;", "'"), "html.parser")


-def parse_html(html):
+def get_text(html):
+    """Converts html to text, strips all tags."""
+    text = parse_html(html).get_text()
+    return unicodedata.normalize("NFKC", text)
+
+
+def html_to_paragraphs(html):
    """Attempt to convert html to plain text while keeping line breaks.
    Returns a list of paragraphs, each being a list of lines.
    """
@ -54,7 +57,7 @@ def format_content(content):
    Returns a generator yielding lines of content.
    """

-    paragraphs = parse_html(content)
+    paragraphs = html_to_paragraphs(content)

    first = True

@ -186,3 +189,14 @@ def _warn_scheme_deprecated():
        "instead write:",
        "  toot instance http://unsafehost.com\n"
    ]))
+
+
+def urlencode_url(url):
+    parsed_url = urlparse(url)
+
+    # unencode before encoding, to prevent double-urlencoding
+    encoded_path = quote(unquote(parsed_url.path), safe="-._~()'!*:@,;+&=/")
+    encoded_query = urlencode({k: quote(unquote(v), safe="-._~()'!*:@,;?/") for k, v in parsed_url.params})
+    encoded_url = parsed_url._replace(path=encoded_path, params=encoded_query).geturl()
+
+    return encoded_url