Update some markup
This commit is contained in:
parent
e72be437e6
commit
28dd345219
1 changed files with 47 additions and 1 deletions
|
@ -5,6 +5,7 @@ import subprocess
|
|||
import tempfile
|
||||
import unicodedata
|
||||
import warnings
|
||||
from itertools import chain
|
||||
from typing import Dict
|
||||
from urllib.parse import quote, unquote, urlencode, urlparse
|
||||
|
||||
|
@ -41,7 +42,7 @@ def get_text(element, links, images):
|
|||
text = element.text
|
||||
if (element['href'] != element.text
|
||||
and 'mention' not in element.get('class', [])
|
||||
and 'tag' not in element.get('rel')):
|
||||
and 'tag' not in element.get('rel', [])):
|
||||
links.append(element['href'])
|
||||
text = f'{text}[{len(links)}]'
|
||||
return f'<cyan>{text}</cyan>'
|
||||
|
@ -50,6 +51,51 @@ def get_text(element, links, images):
|
|||
images.append(element['src'])
|
||||
text = f'{text}[{len(images)}]'
|
||||
return f'<cyan>{text}</cyan>'
|
||||
if element.name in ('i', 'em'):
|
||||
return f'<italic>{element.text}</italic>'
|
||||
if element.name in ('b', 'strong'):
|
||||
return f'<bold>{element.text}</bold>'
|
||||
if element.name in ('s', 'del'):
|
||||
return f'<strikethrough>{element.text}</strikethrough>'
|
||||
if element.name == 'u':
|
||||
return f'<underline>{element.text}</underline>'
|
||||
for a in element.find_all('a'):
|
||||
soup = BeautifulSoup("", "html.parser")
|
||||
if (element['href'] != element.text
|
||||
and 'mention' not in element.get('class', [])
|
||||
and 'tag' not in element.get('rel', [])):
|
||||
links.append(a.href)
|
||||
new_tag = soup.new_tag('a')
|
||||
new_tag.string = '<cyan>' + a.text + '</cyan>'
|
||||
a.replace_with(new_tag)
|
||||
for img in element.find_all('img'):
|
||||
soup = BeautifulSoup("", "html.parser")
|
||||
text = element.get('alt', 'image')
|
||||
images.append(element['src'])
|
||||
text = f'{text}[{len(images)}]'
|
||||
new_tag = soup.new_tag('span')
|
||||
new_tag.string = '<cyan>' + text + '</cyan>'
|
||||
img.replace_with(new_tag)
|
||||
for italic in chain(element.find_all('i'), element.find_all('em')):
|
||||
soup = BeautifulSoup("", "html.parser")
|
||||
new_tag = soup.new_tag('span')
|
||||
new_tag.string = '<italic>' + a.text + '</italic>'
|
||||
italic.replace_with(new_tag)
|
||||
for bold in chain(element.find_all('b'), element.find_all('strong')):
|
||||
soup = BeautifulSoup("", "html.parser")
|
||||
new_tag = soup.new_tag('span')
|
||||
new_tag.string = '<bold>' + a.text + '</bold>'
|
||||
bold.replace_with(new_tag)
|
||||
for underline in element.find_all('u'):
|
||||
soup = BeautifulSoup("", "html.parser")
|
||||
new_tag = soup.new_tag('span')
|
||||
new_tag.string = '<underline>' + a.text + '</underline>'
|
||||
underline.replace_with(new_tag)
|
||||
for strike in chain(element.find_all('s'), element.find_all('del')):
|
||||
soup = BeautifulSoup("", "html.parser")
|
||||
new_tag = soup.new_tag('span')
|
||||
new_tag.string = '<strikethrough>' + a.text + '</strikethrough>'
|
||||
strike.replace_with(new_tag)
|
||||
text = element.get_text()
|
||||
text = text.replace('<', '\\<')
|
||||
text = text.replace('<', '<')
|
||||
|
|
Loading…
Reference in a new issue