Add strikethrough/underline <-> unicode converter to formatter
This commit is contained in:
@@ -25,7 +25,8 @@ from telethon_aio.tl.types import *
|
|||||||
|
|
||||||
from .. import user as u, puppet as pu, portal as po
|
from .. import user as u, puppet as pu, portal as po
|
||||||
from ..db import Message as DBMessage
|
from ..db import Message as DBMessage
|
||||||
from .util import (add_surrogates, remove_surrogates, trim_reply_fallback_html, trim_reply_fallback_text)
|
from .util import (add_surrogates, remove_surrogates, trim_reply_fallback_html,
|
||||||
|
trim_reply_fallback_text, html_to_unicode)
|
||||||
|
|
||||||
log = logging.getLogger("mau.fmt.mx")
|
log = logging.getLogger("mau.fmt.mx")
|
||||||
|
|
||||||
@@ -35,7 +36,7 @@ class MatrixParser(HTMLParser):
|
|||||||
room_regex = re.compile("https://matrix.to/#/(#.+:.+)")
|
room_regex = re.compile("https://matrix.to/#/(#.+:.+)")
|
||||||
block_tags = ("br", "p", "pre", "blockquote",
|
block_tags = ("br", "p", "pre", "blockquote",
|
||||||
"ol", "ul", "li",
|
"ol", "ul", "li",
|
||||||
"h1", "h2", "h3", "h4", "h5", "h6"
|
"h1", "h2", "h3", "h4", "h5", "h6",
|
||||||
"div", "hr", "table")
|
"div", "hr", "table")
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
@@ -159,6 +160,14 @@ class MatrixParser(HTMLParser):
|
|||||||
text = url
|
text = url
|
||||||
elif previous_tag == "command":
|
elif previous_tag == "command":
|
||||||
text = f"/{text}"
|
text = f"/{text}"
|
||||||
|
|
||||||
|
# Strikethrough
|
||||||
|
if "del" in self._open_tags:
|
||||||
|
text = html_to_unicode(text, "\u0336")
|
||||||
|
# Underline
|
||||||
|
if "u" in self._open_tags:
|
||||||
|
text = html_to_unicode(text, "\u0332")
|
||||||
|
|
||||||
list_entry_handled_once = False
|
list_entry_handled_once = False
|
||||||
# In order to maintain order of things like blockquotes in lists or lists in blockquotes,
|
# In order to maintain order of things like blockquotes in lists or lists in blockquotes,
|
||||||
# we can't just have ifs/elses and we need to actually loop through the open tags in order.
|
# we can't just have ifs/elses and we need to actually loop through the open tags in order.
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ from mautrix_appservice import MatrixRequestError
|
|||||||
from .. import user as u, puppet as pu, portal as po
|
from .. import user as u, puppet as pu, portal as po
|
||||||
from ..db import Message as DBMessage
|
from ..db import Message as DBMessage
|
||||||
from .util import (add_surrogates, remove_surrogates, trim_reply_fallback_html,
|
from .util import (add_surrogates, remove_surrogates, trim_reply_fallback_html,
|
||||||
trim_reply_fallback_text)
|
trim_reply_fallback_text, unicode_to_html)
|
||||||
|
|
||||||
log = logging.getLogger("mau.fmt.tg")
|
log = logging.getLogger("mau.fmt.tg")
|
||||||
|
|
||||||
@@ -138,6 +138,9 @@ async def telegram_to_matrix(evt, source, main_intent=None, is_edit=False):
|
|||||||
text += f"\n- {evt.post_author}"
|
text += f"\n- {evt.post_author}"
|
||||||
html += f"<br/><i>- <u>{evt.post_author}</u></i>"
|
html += f"<br/><i>- <u>{evt.post_author}</u></i>"
|
||||||
|
|
||||||
|
html = unicode_to_html(text, html, "\u0336", "del")
|
||||||
|
html = unicode_to_html(text, html, "\u0332", "u")
|
||||||
|
|
||||||
if html:
|
if html:
|
||||||
html = html.replace("\n", "<br/>")
|
html = html.replace("\n", "<br/>")
|
||||||
|
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
from html import escape
|
||||||
import struct
|
import struct
|
||||||
import re
|
import re
|
||||||
|
|
||||||
@@ -31,3 +32,33 @@ HTML_REPLY_FALLBACK_REGEX = re.compile(r"^<blockquote data-mx-reply>[\s\S]+?</bl
|
|||||||
|
|
||||||
def trim_reply_fallback_html(html):
|
def trim_reply_fallback_html(html):
|
||||||
return HTML_REPLY_FALLBACK_REGEX.sub("", html)
|
return HTML_REPLY_FALLBACK_REGEX.sub("", html)
|
||||||
|
|
||||||
|
|
||||||
|
def unicode_to_html(text, html, ctrl, tag):
|
||||||
|
if "\u0336" not in text and "\u0332" not in text:
|
||||||
|
return html
|
||||||
|
if not html:
|
||||||
|
html = escape(text)
|
||||||
|
tag_start = f"<{tag}>"
|
||||||
|
tag_end = f"</{tag}>"
|
||||||
|
characters = html.split(ctrl)
|
||||||
|
html = ""
|
||||||
|
in_del = False
|
||||||
|
for char in characters:
|
||||||
|
if not in_del:
|
||||||
|
if len(char) > 1:
|
||||||
|
html += char[0:-1]
|
||||||
|
char = char[-1]
|
||||||
|
html += tag_start
|
||||||
|
in_del = True
|
||||||
|
html += char
|
||||||
|
else:
|
||||||
|
if len(char) > 1:
|
||||||
|
html += tag_end
|
||||||
|
in_del = False
|
||||||
|
html += char
|
||||||
|
return html
|
||||||
|
|
||||||
|
|
||||||
|
def html_to_unicode(text, ctrl):
|
||||||
|
return ctrl.join(text) + ctrl
|
||||||
|
|||||||
Reference in New Issue
Block a user