Add strikethrough/underline <-> unicode converter to formatter

This commit is contained in:
Tulir Asokan
2018-03-07 14:03:38 +02:00
parent 13dddb4c10
commit a6f26c16fc
3 changed files with 46 additions and 3 deletions
+11 -2
View File
@@ -25,7 +25,8 @@ from telethon_aio.tl.types import *
from .. import user as u, puppet as pu, portal as po from .. import user as u, puppet as pu, portal as po
from ..db import Message as DBMessage from ..db import Message as DBMessage
from .util import (add_surrogates, remove_surrogates, trim_reply_fallback_html, trim_reply_fallback_text) from .util import (add_surrogates, remove_surrogates, trim_reply_fallback_html,
trim_reply_fallback_text, html_to_unicode)
log = logging.getLogger("mau.fmt.mx") log = logging.getLogger("mau.fmt.mx")
@@ -35,7 +36,7 @@ class MatrixParser(HTMLParser):
room_regex = re.compile("https://matrix.to/#/(#.+:.+)") room_regex = re.compile("https://matrix.to/#/(#.+:.+)")
block_tags = ("br", "p", "pre", "blockquote", block_tags = ("br", "p", "pre", "blockquote",
"ol", "ul", "li", "ol", "ul", "li",
"h1", "h2", "h3", "h4", "h5", "h6" "h1", "h2", "h3", "h4", "h5", "h6",
"div", "hr", "table") "div", "hr", "table")
def __init__(self): def __init__(self):
@@ -159,6 +160,14 @@ class MatrixParser(HTMLParser):
text = url text = url
elif previous_tag == "command": elif previous_tag == "command":
text = f"/{text}" text = f"/{text}"
# Strikethrough
if "del" in self._open_tags:
text = html_to_unicode(text, "\u0336")
# Underline
if "u" in self._open_tags:
text = html_to_unicode(text, "\u0332")
list_entry_handled_once = False list_entry_handled_once = False
# In order to maintain order of things like blockquotes in lists or lists in blockquotes, # In order to maintain order of things like blockquotes in lists or lists in blockquotes,
# we can't just have ifs/elses and we need to actually loop through the open tags in order. # we can't just have ifs/elses and we need to actually loop through the open tags in order.
+4 -1
View File
@@ -23,7 +23,7 @@ from mautrix_appservice import MatrixRequestError
from .. import user as u, puppet as pu, portal as po from .. import user as u, puppet as pu, portal as po
from ..db import Message as DBMessage from ..db import Message as DBMessage
from .util import (add_surrogates, remove_surrogates, trim_reply_fallback_html, from .util import (add_surrogates, remove_surrogates, trim_reply_fallback_html,
trim_reply_fallback_text) trim_reply_fallback_text, unicode_to_html)
log = logging.getLogger("mau.fmt.tg") log = logging.getLogger("mau.fmt.tg")
@@ -138,6 +138,9 @@ async def telegram_to_matrix(evt, source, main_intent=None, is_edit=False):
text += f"\n- {evt.post_author}" text += f"\n- {evt.post_author}"
html += f"<br/><i>- <u>{evt.post_author}</u></i>" html += f"<br/><i>- <u>{evt.post_author}</u></i>"
html = unicode_to_html(text, html, "\u0336", "del")
html = unicode_to_html(text, html, "\u0332", "u")
if html: if html:
html = html.replace("\n", "<br/>") html = html.replace("\n", "<br/>")
+31
View File
@@ -1,3 +1,4 @@
from html import escape
import struct import struct
import re import re
@@ -31,3 +32,33 @@ HTML_REPLY_FALLBACK_REGEX = re.compile(r"^<blockquote data-mx-reply>[\s\S]+?</bl
def trim_reply_fallback_html(html): def trim_reply_fallback_html(html):
return HTML_REPLY_FALLBACK_REGEX.sub("", html) return HTML_REPLY_FALLBACK_REGEX.sub("", html)
def unicode_to_html(text, html, ctrl, tag):
if "\u0336" not in text and "\u0332" not in text:
return html
if not html:
html = escape(text)
tag_start = f"<{tag}>"
tag_end = f"</{tag}>"
characters = html.split(ctrl)
html = ""
in_del = False
for char in characters:
if not in_del:
if len(char) > 1:
html += char[0:-1]
char = char[-1]
html += tag_start
in_del = True
html += char
else:
if len(char) > 1:
html += tag_end
in_del = False
html += char
return html
def html_to_unicode(text, ctrl):
return ctrl.join(text) + ctrl