Extract block tags

2023-11-06 09:56:12 +01:00 · 2023-11-06 09:56:12 +01:00 · 2aba3f93f9
commit 2aba3f93f9
parent a8b4c79716
1 changed files with 5 additions and 17 deletions
--- a/toot/tui/richtext.py
+++ b/toot/tui/richtext.py
@ -12,6 +12,9 @@ from urwid.util import decompose_tagmarkup

 STYLE_NAMES = [p[0] for p in PALETTE]

+# NOTE: update this list if Mastodon starts supporting more block tags
+BLOCK_TAGS = ["p", "pre", "li", "blockquote", "h1", "h2", "h3", "h4", "h5", "h6"]
+

 class ContentParser:
    """Parse a limited subset of HTML and create urwid widgets."""
@ -21,6 +24,7 @@ class ContentParser:
        widgets: List[urwid.Widget] = []
        html = unicodedata.normalize("NFKC", html)
        soup = parse_html(html)
+
        first_tag = True
        for e in soup.body or soup:
            if isinstance(e, NavigableString):
@ -37,23 +41,7 @@ class ContentParser:
                # if our HTML starts with a tag, but not a block tag
                # the HTML is out of spec. Attempt a fix by wrapping the
                # HTML with <p></p>
-                if (
-                    first_tag
-                    and not recovery_attempt
-                    and name
-                    not in (
-                        "p",
-                        "pre",
-                        "li",
-                        "blockquote",
-                        "h1",
-                        "h2",
-                        "h3",
-                        "h4",
-                        "h5",
-                        "h6",
-                    )  # NOTE: update this list if Mastodon starts supporting more block tags
-                ):
+                if (first_tag and not recovery_attempt and name not in BLOCK_TAGS):
                    return self.html_to_widgets(f"<p>{html}</p>", recovery_attempt=True)

                # First, look for a custom tag handler method in this class