Add strikethrough/underline <-> unicode converter to formatter

2018-03-07 14:03:38 +02:00
parent 13dddb4c10
commit a6f26c16fc
3 changed files with 46 additions and 3 deletions
@@ -25,7 +25,8 @@ from telethon_aio.tl.types import *
 from .. import user as u, puppet as pu, portal as po
 from ..db import Message as DBMessage
-from .util import (add_surrogates, remove_surrogates, trim_reply_fallback_html, trim_reply_fallback_text)
+from .util import (add_surrogates, remove_surrogates, trim_reply_fallback_html,
                   trim_reply_fallback_text, html_to_unicode)
 log = logging.getLogger("mau.fmt.mx")
@@ -35,7 +36,7 @@ class MatrixParser(HTMLParser):
    room_regex = re.compile("https://matrix.to/#/(#.+:.+)")
    block_tags = ("br", "p", "pre", "blockquote",
                  "ol", "ul", "li",
-                  "h1", "h2", "h3", "h4", "h5", "h6"
+                  "h1", "h2", "h3", "h4", "h5", "h6",
                  "div", "hr", "table")
    def __init__(self):
@@ -159,6 +160,14 @@ class MatrixParser(HTMLParser):
                text = url
        elif previous_tag == "command":
            text = f"/{text}"
        # Strikethrough
        if "del" in self._open_tags:
            text = html_to_unicode(text, "\u0336")
        # Underline
        if "u" in self._open_tags:
            text = html_to_unicode(text, "\u0332")
        list_entry_handled_once = False
        # In order to maintain order of things like blockquotes in lists or lists in blockquotes,
        # we can't just have ifs/elses and we need to actually loop through the open tags in order.
@@ -23,7 +23,7 @@ from mautrix_appservice import MatrixRequestError
 from .. import user as u, puppet as pu, portal as po
 from ..db import Message as DBMessage
 from .util import (add_surrogates, remove_surrogates, trim_reply_fallback_html,
-                   trim_reply_fallback_text)
+                   trim_reply_fallback_text, unicode_to_html)
 log = logging.getLogger("mau.fmt.tg")
@@ -138,6 +138,9 @@ async def telegram_to_matrix(evt, source, main_intent=None, is_edit=False):
        text += f"\n- {evt.post_author}"
        html += f"<br/><i>- <u>{evt.post_author}</u></i>"
    html = unicode_to_html(text, html, "\u0336", "del")
    html = unicode_to_html(text, html, "\u0332", "u")
    if html:
        html = html.replace("\n", "<br/>")
@@ -1,3 +1,4 @@
 from html import escape
 import struct
 import re
@@ -31,3 +32,33 @@ HTML_REPLY_FALLBACK_REGEX = re.compile(r"^<blockquote data-mx-reply>[\s\S]+?</bl
 def trim_reply_fallback_html(html):
    return HTML_REPLY_FALLBACK_REGEX.sub("", html)
 def unicode_to_html(text, html, ctrl, tag):
    if "\u0336" not in text and "\u0332" not in text:
        return html
    if not html:
        html = escape(text)
    tag_start = f"<{tag}>"
    tag_end = f"</{tag}>"
    characters = html.split(ctrl)
    html = ""
    in_del = False
    for char in characters:
        if not in_del:
            if len(char) > 1:
                html += char[0:-1]
                char = char[-1]
            html += tag_start
            in_del = True
            html += char
        else:
            if len(char) > 1:
                html += tag_end
                in_del = False
            html += char
    return html
 def html_to_unicode(text, ctrl):
    return ctrl.join(text) + ctrl