Merge branch 'native-strike-underline'

This commit is contained in:
Tulir Asokan
2019-07-06 20:50:07 +03:00
4 changed files with 44 additions and 73 deletions
@@ -18,15 +18,15 @@ from typing import List, Tuple, Pattern
import re import re
from telethon.tl.types import (MessageEntityMention as Mention, MessageEntityBotCommand as Command, from telethon.tl.types import (MessageEntityMention as Mention, MessageEntityBotCommand as Command,
MessageEntityMentionName as MentionName, MessageEntityEmail as Email, MessageEntityMentionName as MentionName, MessageEntityUrl as URL,
MessageEntityUrl as URL, MessageEntityTextUrl as TextURL, MessageEntityEmail as Email, MessageEntityTextUrl as TextURL,
MessageEntityBold as Bold, MessageEntityItalic as Italic, MessageEntityBold as Bold, MessageEntityItalic as Italic,
MessageEntityCode as Code, MessageEntityPre as Pre, MessageEntityCode as Code, MessageEntityPre as Pre,
TypeMessageEntity) MessageEntityStrike as Strike, MessageEntityUnderline as Underline,
MessageEntityBlockquote as Blockquote, TypeMessageEntity)
from ... import user as u, puppet as pu, portal as po from ... import user as u, puppet as pu, portal as po
from ...types import MatrixUserID from ...types import MatrixUserID
from ..util import html_to_unicode
from .telegram_message import TelegramMessage, Entity, offset_length_multiply from .telegram_message import TelegramMessage, Entity, offset_length_multiply
from .html_reader import HTMLNode, read_html from .html_reader import HTMLNode, read_html
@@ -101,13 +101,6 @@ class MatrixParser:
children.append(child) children.append(child)
return TelegramMessage.join(children, "\n") return TelegramMessage.join(children, "\n")
@classmethod
def blockquote_to_tmessage(cls, node: HTMLNode, ctx: RecursionContext) -> TelegramMessage:
msg = cls.tag_aware_parse_node(node, ctx)
children = msg.trim().split("\n")
children = [child.prepend("> ") for child in children]
return TelegramMessage.join(children, "\n")
@classmethod @classmethod
def header_to_tmessage(cls, node: HTMLNode, ctx: RecursionContext) -> TelegramMessage: def header_to_tmessage(cls, node: HTMLNode, ctx: RecursionContext) -> TelegramMessage:
children = cls.node_to_tmessages(node, ctx) children = cls.node_to_tmessages(node, ctx)
@@ -122,15 +115,14 @@ class MatrixParser:
msg.format(Bold) msg.format(Bold)
elif node.tag in ("i", "em"): elif node.tag in ("i", "em"):
msg.format(Italic) msg.format(Italic)
elif node.tag in ("s", "strike", "del"):
msg.format(Strike)
elif node.tag in ("u", "ins"):
msg.format(Underline)
elif node == "blockquote":
msg.format(Blockquote)
elif node.tag == "command": elif node.tag == "command":
msg.format(Command) msg.format(Command)
elif node.tag in ("s", "strike", "del"):
msg.text = html_to_unicode(msg.text, "\u0336")
elif node.tag in ("u", "ins"):
msg.text = html_to_unicode(msg.text, "\u0332")
if node.tag in ("s", "strike", "del", "u", "ins"):
msg.entities = Entity.adjust(msg.entities, offset_length_multiply(2))
return msg return msg
@@ -171,9 +163,7 @@ class MatrixParser:
@classmethod @classmethod
def node_to_tmessage(cls, node: HTMLNode, ctx: RecursionContext) -> TelegramMessage: def node_to_tmessage(cls, node: HTMLNode, ctx: RecursionContext) -> TelegramMessage:
if node.tag == "blockquote": if node.tag == "ol":
return cls.blockquote_to_tmessage(node, ctx)
elif node.tag == "ol":
return cls.list_to_tmessage(node, ctx) return cls.list_to_tmessage(node, ctx)
elif node.tag == "ul": elif node.tag == "ul":
return cls.list_to_tmessage(node, ctx.enter_list()) return cls.list_to_tmessage(node, ctx.enter_list())
@@ -181,7 +171,8 @@ class MatrixParser:
return cls.header_to_tmessage(node, ctx) return cls.header_to_tmessage(node, ctx)
elif node.tag == "br": elif node.tag == "br":
return TelegramMessage("\n") return TelegramMessage("\n")
elif node.tag in ("b", "strong", "i", "em", "s", "del", "u", "ins", "command"): elif node.tag in ("b", "strong", "i", "em", "s", "del", "u", "ins", "blockquote",
"command"):
return cls.basic_format_to_tmessage(node, ctx) return cls.basic_format_to_tmessage(node, ctx)
elif node.tag == "a": elif node.tag == "a":
return cls.link_to_tstring(node, ctx) return cls.link_to_tstring(node, ctx)
+29 -17
View File
@@ -24,7 +24,8 @@ from telethon.tl.types import (MessageEntityMention, MessageEntityMentionName, M
MessageEntityItalic, MessageEntityCode, MessageEntityPre, MessageEntityItalic, MessageEntityCode, MessageEntityPre,
MessageEntityBotCommand, MessageEntityHashtag, MessageEntityCashtag, MessageEntityBotCommand, MessageEntityHashtag, MessageEntityCashtag,
MessageEntityPhone, TypeMessageEntity, Message, PeerChannel, MessageEntityPhone, TypeMessageEntity, Message, PeerChannel,
MessageFwdHeader, PeerUser) MessageEntityBlockquote, MessageEntityStrike, MessageFwdHeader,
MessageEntityUnderline, PeerUser)
from mautrix_appservice import MatrixRequestError from mautrix_appservice import MatrixRequestError
from mautrix_appservice.intent_api import IntentAPI from mautrix_appservice.intent_api import IntentAPI
@@ -33,7 +34,7 @@ from .. import user as u, puppet as pu, portal as po
from ..types import TelegramID from ..types import TelegramID
from ..db import Message as DBMessage from ..db import Message as DBMessage
from .util import (add_surrogates, remove_surrogates, trim_reply_fallback_html, from .util import (add_surrogates, remove_surrogates, trim_reply_fallback_html,
trim_reply_fallback_text, unicode_to_html) trim_reply_fallback_text)
if TYPE_CHECKING: if TYPE_CHECKING:
from ..abstract_user import AbstractUser from ..abstract_user import AbstractUser
@@ -194,9 +195,6 @@ async def telegram_to_matrix(evt: Message, source: "AbstractUser",
text += f"\n- {evt.post_author}" text += f"\n- {evt.post_author}"
html += f"<br/><i>- <u>{evt.post_author}</u></i>" html += f"<br/><i>- <u>{evt.post_author}</u></i>"
html = unicode_to_html(text, html, "\u0336", "del")
html = unicode_to_html(text, html, "\u0332", "u")
if html: if html:
html = html.replace("\n", "<br/>") html = html.replace("\n", "<br/>")
@@ -214,29 +212,43 @@ def _telegram_entities_to_matrix_catch(text: str, entities: List[TypeMessageEnti
return "[failed conversion in _telegram_entities_to_matrix]" return "[failed conversion in _telegram_entities_to_matrix]"
def _telegram_entities_to_matrix(text: str, entities: List[TypeMessageEntity]) -> str: def _telegram_entities_to_matrix(text: str, entities: List[TypeMessageEntity],
offset: int = 0, length: int = None) -> str:
if not entities: if not entities:
return text return escape(text)
if length is None:
length = len(text)
html = [] html = []
last_offset = 0 last_offset = 0
for entity in entities: for i, entity in enumerate(entities):
if entity.offset > last_offset: if entity.offset > offset + length:
html.append(escape(text[last_offset:entity.offset])) break
elif entity.offset < last_offset: relative_offset = entity.offset - offset
if relative_offset > last_offset:
html.append(escape(text[last_offset:relative_offset]))
elif relative_offset < last_offset:
continue continue
skip_entity = False skip_entity = False
entity_text = escape(text[entity.offset:entity.offset + entity.length]) entity_text = _telegram_entities_to_matrix(
text=text[relative_offset:relative_offset + entity.length],
entities=entities[i + 1:], offset=entity.offset, length=entity.length)
entity_type = type(entity) entity_type = type(entity)
if entity_type == MessageEntityBold: if entity_type == MessageEntityBold:
html.append(f"<strong>{entity_text}</strong>") html.append(f"<strong>{entity_text}</strong>")
elif entity_type == MessageEntityItalic: elif entity_type == MessageEntityItalic:
html.append(f"<em>{entity_text}</em>") html.append(f"<em>{entity_text}</em>")
elif entity_type == MessageEntityUnderline:
html.append(f"<u>{entity_text}</u>")
elif entity_type == MessageEntityStrike:
html.append(f"<del>{entity_text}</del>")
elif entity_type == MessageEntityBlockquote:
html.append(f"<blockquote>{entity_text}</blockquote>")
elif entity_type == MessageEntityCode: elif entity_type == MessageEntityCode:
html.append(("<pre><code>{entity_text}</code></pre>" html.append(f"<pre><code>{entity_text}</code></pre>"
if "\n" in entity_text if "\n" in entity_text
else "<code>{entity_text}</code>").format(entity_text=entity_text)) else f"<code>{entity_text}</code>")
elif entity_type == MessageEntityPre: elif entity_type == MessageEntityPre:
skip_entity = _parse_pre(html, entity_text, entity.language) skip_entity = _parse_pre(html, entity_text, entity.language)
elif entity_type == MessageEntityMention: elif entity_type == MessageEntityMention:
@@ -254,8 +266,8 @@ def _telegram_entities_to_matrix(text: str, entities: List[TypeMessageEntity]) -
html.append(f"<font color='blue'>{entity_text}</font>") html.append(f"<font color='blue'>{entity_text}</font>")
else: else:
skip_entity = True skip_entity = True
last_offset = entity.offset + (0 if skip_entity else entity.length) last_offset = relative_offset + (0 if skip_entity else entity.length)
html.append(text[last_offset:]) html.append(escape(text[last_offset:]))
return "".join(html) return "".join(html)
-32
View File
@@ -20,38 +20,6 @@ import struct
import re import re
def unicode_to_html(text: str, html: str, ctrl: str, tag: str) -> str:
if ctrl not in text:
return html
if not html:
html = escape(text)
tag_start = f"<{tag}>"
tag_end = f"</{tag}>"
characters = html.split(ctrl)
html = ""
in_tag = False
for char in characters:
if not in_tag:
if len(char) > 1:
html += char[0:-1]
char = char[-1]
html += tag_start
in_tag = True
html += char
else:
if len(char) > 1:
html += tag_end
in_tag = False
html += char
if in_tag:
html += tag_end
return html
def html_to_unicode(text: str, ctrl: str) -> str:
return ctrl.join(text) + ctrl
# add_surrogates and remove_surrogates are unicode surrogate utility functions from Telethon. # add_surrogates and remove_surrogates are unicode surrogate utility functions from Telethon.
# Licensed under the MIT license. # Licensed under the MIT license.
# https://github.com/LonamiWebs/Telethon/blob/7cce7aa3e4c6c7019a55530391b1761d33e5a04e/telethon/helpers.py # https://github.com/LonamiWebs/Telethon/blob/7cce7aa3e4c6c7019a55530391b1761d33e5a04e/telethon/helpers.py
+2 -2
View File
@@ -6,7 +6,7 @@ extras = {
"fast_crypto": ["cryptg>=0.1,<0.3"], "fast_crypto": ["cryptg>=0.1,<0.3"],
"webp_convert": ["Pillow>=4.3.0,<7"], "webp_convert": ["Pillow>=4.3.0,<7"],
"hq_thumbnails": ["moviepy>=1.0,<2.0"], "hq_thumbnails": ["moviepy>=1.0,<2.0"],
"metrics": ["prometheus-client>=0.6.0,<0.7.0"], "metrics": ["prometheus-client>=0.6.0,<0.8.0"],
} }
extras["all"] = list({dep for deps in extras.values() for dep in deps}) extras["all"] = list({dep for deps in extras.values() for dep in deps})
@@ -38,7 +38,7 @@ setuptools.setup(
"ruamel.yaml>=0.15.35,<0.16", "ruamel.yaml>=0.15.35,<0.16",
"future-fstrings>=0.4.2", "future-fstrings>=0.4.2",
"python-magic>=0.4.15,<0.5", "python-magic>=0.4.15,<0.5",
"telethon>=1.7,<1.9", "telethon>=1.9,<1.10",
"telethon-session-sqlalchemy>=0.2.14,<0.3", "telethon-session-sqlalchemy>=0.2.14,<0.3",
], ],
extras_require=extras, extras_require=extras,