Remove the ContentParser class, use functions instead

It did not help, just added to the indent.
2023-11-06 18:14:21 +01:00 · 2023-11-06 18:14:21 +01:00 · 073dd3025c
commit 073dd3025c
parent a544453338
4 changed files with 394 additions and 391 deletions
--- a/toot/tui/overlays.py
+++ b/toot/tui/overlays.py
@ -7,7 +7,7 @@ from toot import __version__
 from toot import api
 from toot.tui.utils import highlight_keys
 from toot.tui.widgets import Button, EditBox, SelectableText
-from toot.tui.richtext import ContentParser
+from toot.tui.richtext import html_to_widgets


 class StatusSource(urwid.Padding):
@ -255,8 +255,6 @@ class Account(urwid.ListBox):
        super().__init__(walker)

    def generate_contents(self, account, relationship=None, last_action=None):
-        parser = ContentParser()
-
        if self.last_action and not self.last_action.startswith("Confirm"):
            yield Button(f"Confirm {self.last_action}", on_press=take_action, user_data=self)
            yield Button("Cancel", on_press=cancel_action, user_data=self)
@ -282,7 +280,7 @@ class Account(urwid.ListBox):
        if account["note"]:
            yield urwid.Divider()

-            widgetlist = parser.html_to_widgets(account["note"])
+            widgetlist = html_to_widgets(account["note"])
            for line in widgetlist:
                yield (line)

@ -317,7 +315,7 @@ class Account(urwid.ListBox):
                yield urwid.Divider()
                yield urwid.Text([("bold", f"{name.rstrip(':')}"), ":"])

-                widgetlist = parser.html_to_widgets(field["value"])
+                widgetlist = html_to_widgets(field["value"])
                for line in widgetlist:
                    yield (line)

--- a/toot/tui/poll.py
+++ b/toot/tui/poll.py
@ -4,7 +4,7 @@ from toot import api
 from toot.exceptions import ApiError
 from toot.utils.datetime import parse_datetime
 from .widgets import Button, CheckBox, RadioButton
-from .richtext import ContentParser
+from .richtext import html_to_widgets


 class Poll(urwid.ListBox):
@ -86,8 +86,7 @@ class Poll(urwid.ListBox):
    def generate_contents(self, status):
        yield urwid.Divider()

-        parser = ContentParser()
-        widgetlist = parser.html_to_widgets(status.data["content"])
+        widgetlist = html_to_widgets(status.data["content"])

        for line in widgetlist:
            yield (line)
--- a/toot/tui/richtext.py
+++ b/toot/tui/richtext.py
@ -16,10 +16,7 @@ STYLE_NAMES = [p[0] for p in PALETTE]
 BLOCK_TAGS = ["p", "pre", "li", "blockquote", "h1", "h2", "h3", "h4", "h5", "h6"]


-class ContentParser:
-    """Parse a limited subset of HTML and create urwid widgets."""
-
-    def html_to_widgets(self, html, recovery_attempt=False) -> List[urwid.Widget]:
+def html_to_widgets(html, recovery_attempt=False) -> List[urwid.Widget]:
    """Convert html to urwid widgets"""
    widgets: List[urwid.Widget] = []
    html = unicodedata.normalize("NFKC", html)
@ -33,7 +30,7 @@ class ContentParser:
                # the HTML is out of spec, doesn't start with a tag,
                # we see this in content from Pixelfed servers.
                # attempt a fix by wrapping the HTML with <p></p>
-                    return self.html_to_widgets(f"<p>{html}</p>", recovery_attempt=True)
+                return html_to_widgets(f"<p>{html}</p>", recovery_attempt=True)
            else:
                continue
        else:
@ -42,14 +39,14 @@ class ContentParser:
            # the HTML is out of spec. Attempt a fix by wrapping the
            # HTML with <p></p>
            if (first_tag and not recovery_attempt and name not in BLOCK_TAGS):
-                    return self.html_to_widgets(f"<p>{html}</p>", recovery_attempt=True)
+                return html_to_widgets(f"<p>{html}</p>", recovery_attempt=True)

-                markup = self.render(name, e)
+            markup = render(name, e)
            first_tag = False

        if not isinstance(markup, urwid.Widget):
            # plaintext, so create a padded text widget
-                txt = self.text_to_widget("", markup)
+            txt = text_to_widget("", markup)
            markup = urwid.Padding(
                txt,
                align="left",
@ -61,26 +58,29 @@ class ContentParser:
        widgets.append(urwid.Divider(" "))
    return widgets[:-1]  # but suppress the last blank line

-    def inline_tag_to_text(self, tag) -> Tuple:
+
+def inline_tag_to_text(tag) -> Tuple:
    """Convert html tag to plain text with tag as attributes recursively"""
-        markups = self.process_inline_tag_children(tag)
+    markups = process_inline_tag_children(tag)
    if not markups:
        return (tag.name, "")
    return (tag.name, markups)

-    def process_inline_tag_children(self, tag) -> List:
+
+def process_inline_tag_children(tag) -> List:
    """Recursively retrieve all children
    and convert to a list of markup text"""
    markups = []
    for child in tag.children:
        if isinstance(child, Tag):
-                markup = self.render(child.name, child)
+            markup = render(child.name, child)
            markups.append(markup)
        else:
            markups.append(child)
    return markups

-    def text_to_widget(self, attr, markup) -> urwid.Widget:
+
+def text_to_widget(attr, markup) -> urwid.Widget:
    if not has_urwidgets:
        return urwid.Text((attr, markup))

@ -99,7 +99,7 @@ class ContentParser:
            # find anchor titles with an ETX separator followed by href
            m = re.match(r"(^.+)\x03(.+$)", txt)
            if m:
-                    anchor_attr = self.get_best_anchor_attr(attr_list)
+                anchor_attr = get_best_anchor_attr(attr_list)
                markup_list.append(
                    parse_text(
                        txt,
@ -114,7 +114,8 @@ class ContentParser:

    return TextEmbed(markup_list)

-    def process_block_tag_children(self, tag) -> List[urwid.Widget]:
+
+def process_block_tag_children(tag) -> List[urwid.Widget]:
    """Recursively retrieve all children
    and convert to a list of widgets
    any inline tags containing text will be
@ -129,7 +130,7 @@ class ContentParser:
        if isinstance(child, Tag):
            # child is a nested tag; process using custom method
            # or default to inline_tag_to_text
-                result = self.render(child.name, child)
+            result = render(child.name, child)
            if isinstance(result, urwid.Widget):
                found_nested_widget = True
                child_widgets.append(result)
@ -147,17 +148,18 @@ class ContentParser:

    widget_list = []
    if len(pre_widget_markups):
-            widget_list.append(self.text_to_widget(tag.name, pre_widget_markups))
+        widget_list.append(text_to_widget(tag.name, pre_widget_markups))

    if len(child_widgets):
        widget_list += child_widgets

    if len(post_widget_markups):
-            widget_list.append(self.text_to_widget(tag.name, post_widget_markups))
+        widget_list.append(text_to_widget(tag.name, post_widget_markups))

    return widget_list

-    def get_urwid_attr_name(self, tag) -> str:
+
+def get_urwid_attr_name(tag) -> str:
    """Get the class name and translate to a
    name suitable for use as an urwid
    text attribute name"""
@ -174,17 +176,13 @@ class ContentParser:
    # fallback to returning the tag name
    return tag.name

-    # Tag handlers start here.
-    # Tags not explicitly listed are "supported" by
-    # rendering as text.
-    # Inline tags return a list of marked up text for urwid.Text
-    # Block tags return urwid.Widget

-    def basic_block_tag_handler(self, tag) -> urwid.Widget:
+def basic_block_tag_handler(tag) -> urwid.Widget:
    """default for block tags that need no special treatment"""
-        return urwid.Pile(self.process_block_tag_children(tag))
+    return urwid.Pile(process_block_tag_children(tag))

-    def get_best_anchor_attr(self, attrib_list) -> str:
+
+def get_best_anchor_attr(attrib_list) -> str:
    if not attrib_list:
        return ""
    flat_al = list(flatten(attrib_list))
@ -202,46 +200,48 @@ class ContentParser:

    return "a"

-    def render(self, attr: str, content: str):
+
+def render(attr: str, content: str):
    if attr in ["a"]:
-            return self.render_anchor(content)
+        return render_anchor(content)

    if attr in ["blockquote"]:
-            return self.render_blockquote(content)
+        return render_blockquote(content)

    if attr in ["br"]:
-            return self.render_br(content)
+        return render_br(content)

    if attr in ["em"]:
-            return self.render_em(content)
+        return render_em(content)

    if attr in ["ol"]:
-            return self.render_ol(content)
+        return render_ol(content)

    if attr in ["pre"]:
-            return self.render_pre(content)
+        return render_pre(content)

    if attr in ["span"]:
-            return self.render_span(content)
+        return render_span(content)

    if attr in ["b", "strong"]:
-            return self.render_strong(content)
+        return render_strong(content)

    if attr in ["ul"]:
-            return self.render_ul(content)
+        return render_ul(content)

    # Glitch-soc and Pleroma allow <H1>...<H6> in content
    # Mastodon (PR #23913) does not; header tags are converted to <P><STRONG></STRONG></P>
    if attr in ["p", "div", "li", "h1", "h2", "h3", "h4", "h5", "h6"]:
-            return self.basic_block_tag_handler(content)
+        return basic_block_tag_handler(content)

    # Fall back to inline_tag_to_text handler
-        return self.inline_tag_to_text(content)
+    return inline_tag_to_text(content)

-    def render_anchor(self, tag) -> Tuple:
+
+def render_anchor(tag) -> Tuple:
    """anchor tag handler"""

-        markups = self.process_inline_tag_children(tag)
+    markups = process_inline_tag_children(tag)
    if not markups:
        return (tag.name, "")

@ -257,14 +257,14 @@ class ContentParser:
        # delimiter between the title and the HREF
        title += f"\x03{href}"

-        attr = self.get_best_anchor_attr(attrib_list)
+    attr = get_best_anchor_attr(attrib_list)

    if attr == "a":
        # didn't find an attribute to use
        # in the child markup, so let's
        # try the anchor tag's own attributes

-            attr = self.get_urwid_attr_name(tag)
+        attr = get_urwid_attr_name(tag)

    # hashtag anchors have a class of "mention hashtag"
    # or "hashtag"
@ -275,8 +275,9 @@ class ContentParser:

    return (attr, title)

-    def render_blockquote(self, tag) -> urwid.Widget:
-        widget_list = self.process_block_tag_children(tag)
+
+def render_blockquote(tag) -> urwid.Widget:
+    widget_list = process_block_tag_children(tag)
    blockquote_widget = urwid.LineBox(
        urwid.Padding(
            urwid.Pile(widget_list),
@ -297,13 +298,15 @@ class ContentParser:
    )
    return urwid.Pile([urwid.AttrMap(blockquote_widget, "blockquote")])

-    def render_br(self, tag) -> Tuple:
+
+def render_br(tag) -> Tuple:
    return ("br", "\n")

-    def render_em(self, tag) -> Tuple:
+
+def render_em(tag) -> Tuple:
    # to simplify the number of palette entries
    # translate EM to I (italic)
-        markups = self.process_inline_tag_children(tag)
+    markups = process_inline_tag_children(tag)
    if not markups:
        return ("i", "")

@ -314,7 +317,8 @@ class ContentParser:

    return ("i", markups)

-    def render_ol(self, tag) -> urwid.Widget:
+
+def render_ol(tag) -> urwid.Widget:
    """ordered list tag handler"""

    widgets = []
@ -329,7 +333,7 @@ class ContentParser:
            pass

    for li in tag.find_all("li", recursive=False):
-            markup = self.render("li", li)
+        markup = render("li", li)

        # li value= attribute will change the item number
        # it also overrides any ol start= attribute
@ -341,11 +345,11 @@ class ContentParser:
                pass

        if not isinstance(markup, urwid.Widget):
-                txt = self.text_to_widget("li", [str(list_item_num), ". ", markup])
+            txt = text_to_widget("li", [str(list_item_num), ". ", markup])
            # 1. foo, 2. bar, etc.
            widgets.append(txt)
        else:
-                txt = self.text_to_widget("li", [str(list_item_num), ". "])
+            txt = text_to_widget("li", [str(list_item_num), ". "])
            columns = urwid.Columns(
                [txt, ("weight", 9999, markup)], dividechars=1, min_width=3
            )
@ -355,14 +359,15 @@ class ContentParser:

    return urwid.Pile(widgets)

-    def render_pre(self, tag) -> urwid.Widget:
+
+def render_pre(tag) -> urwid.Widget:
    # <PRE> tag spec says that text should not wrap,
    # but horizontal screen space is at a premium
    # and we have no horizontal scroll bar, so allow
    # wrapping.

    widget_list = [urwid.Divider(" ")]
-        widget_list += self.process_block_tag_children(tag)
+    widget_list += process_block_tag_children(tag)

    pre_widget = urwid.Padding(
        urwid.Pile(widget_list),
@ -374,8 +379,9 @@ class ContentParser:
    )
    return urwid.Pile([urwid.AttrMap(pre_widget, "pre")])

-    def render_span(self, tag) -> Tuple:
-        markups = self.process_inline_tag_children(tag)
+
+def render_span(tag) -> Tuple:
+    markups = process_inline_tag_children(tag)

    if not markups:
        return (tag.name, "")
@ -394,22 +400,23 @@ class ContentParser:
        # if "invisible" in tag.attrs["class"]:
        #     return (tag.name, "")

-            style_name = self.get_urwid_attr_name(tag)
+        style_name = get_urwid_attr_name(tag)

        if style_name != "span":
            # unique class name matches an entry in our palette
            return (style_name, markups)

    if tag.parent:
-            return (self.get_urwid_attr_name(tag.parent), markups)
+        return (get_urwid_attr_name(tag.parent), markups)
    else:
        # fallback
        return ("span", markups)

-    def render_strong(self, tag) -> Tuple:
+
+def render_strong(tag) -> Tuple:
    # to simplify the number of palette entries
    # translate STRONG to B (bold)
-        markups = self.process_inline_tag_children(tag)
+    markups = process_inline_tag_children(tag)
    if not markups:
        return ("b", "")

@ -420,20 +427,21 @@ class ContentParser:

    return ("b", markups)

-    def render_ul(self, tag) -> urwid.Widget:
+
+def render_ul(tag) -> urwid.Widget:
    """unordered list tag handler"""

    widgets = []

    for li in tag.find_all("li", recursive=False):
-            markup = self.render("li", li)
+        markup = render("li", li)

        if not isinstance(markup, urwid.Widget):
-                txt = self.text_to_widget("li", ["\N{bullet} ", markup])
+            txt = text_to_widget("li", ["\N{bullet} ", markup])
            # * foo, * bar, etc.
            widgets.append(txt)
        else:
-                txt = self.text_to_widget("li", ["\N{bullet} "])
+            txt = text_to_widget("li", ["\N{bullet} "])
            columns = urwid.Columns(
                [txt, ("weight", 9999, markup)], dividechars=1, min_width=3
            )
--- a/toot/tui/timeline.py
+++ b/toot/tui/timeline.py
@ -6,6 +6,7 @@ import webbrowser
 from typing import List, Optional

 from toot.tui import app
+from toot.tui.richtext import html_to_widgets
 from toot.utils.datetime import parse_datetime, time_ago
 from toot.utils.language import language_name

@ -13,7 +14,6 @@ from toot.entities import Status
 from toot.tui.scroll import Scrollable, ScrollBar
 from toot.tui.utils import highlight_keys
 from toot.tui.widgets import SelectableText, SelectableColumns
-from toot.tui.richtext import ContentParser
 from toot.utils import urlencode_url
 from toot.tui.stubs.urwidgets import Hyperlink, TextEmbed, parse_text, has_urwidgets

@ -356,9 +356,7 @@ class StatusDetails(urwid.Pile):
            yield ("pack", urwid.Text(("content_warning", "Marked as sensitive. Press S to view.")))
        else:
            content = status.original.translation if status.original.show_translation else status.data["content"]
-
-            parser = ContentParser()
-            widgetlist = parser.html_to_widgets(content)
+            widgetlist = html_to_widgets(content)

            for line in widgetlist:
                yield (line)