Migrate formatter and utils to mautrix-python

This commit is contained in:
Tulir Asokan
2019-08-04 15:20:14 +03:00
parent 05f906427e
commit 32d686e908
11 changed files with 147 additions and 533 deletions
@@ -19,23 +19,24 @@ import logging
from telethon.tl.types import (MessageEntityMention, MessageEntityMentionName, MessageEntityItalic, from telethon.tl.types import (MessageEntityMention, MessageEntityMentionName, MessageEntityItalic,
TypeMessageEntity) TypeMessageEntity)
from telethon.helpers import add_surrogate, del_surrogate
from mautrix.types import RoomID
from ... import puppet as pu from ... import puppet as pu
from ...types import TelegramID, MatrixRoomID from ...types import TelegramID
from ...db import Message as DBMessage from ...db import Message as DBMessage
from ..util import (add_surrogates, remove_surrogates, trim_reply_fallback_html,
trim_reply_fallback_text)
from .parser import ParsedMessage, parse_html from .parser import ParsedMessage, parse_html
if TYPE_CHECKING: if TYPE_CHECKING:
from ...context import Context from ...context import Context
log = logging.getLogger("mau.fmt.mx") # type: logging.Logger log: logging.Logger = logging.getLogger("mau.fmt.mx")
should_bridge_plaintext_highlights = False # type: bool should_bridge_plaintext_highlights: bool = False
command_regex = re.compile(r"^!([A-Za-z0-9@]+)") # type: Pattern command_regex: Pattern = re.compile(r"^!([A-Za-z0-9@]+)")
not_command_regex = re.compile(r"^\\(![A-Za-z0-9@]+)") # type: Pattern not_command_regex: Pattern = re.compile(r"^\\(![A-Za-z0-9@]+)")
plain_mention_regex = None # type: Optional[Pattern] plain_mention_regex: Optional[Pattern] = None
def plain_mention_to_html(match: Match) -> str: def plain_mention_to_html(match: Match) -> str:
@@ -75,8 +76,8 @@ def matrix_to_telegram(html: str) -> ParsedMessage:
if should_bridge_plaintext_highlights: if should_bridge_plaintext_highlights:
html = plain_mention_regex.sub(plain_mention_to_html, html) html = plain_mention_regex.sub(plain_mention_to_html, html)
text, entities = parse_html(add_surrogates(html)) text, entities = parse_html(add_surrogate(html))
text = remove_surrogates(text.strip()) text = del_surrogate(text.strip())
text, entities = cut_long_message(text, entities) text, entities = cut_long_message(text, entities)
return text, entities return text, entities
@@ -85,7 +86,7 @@ def matrix_to_telegram(html: str) -> ParsedMessage:
def matrix_reply_to_telegram(content: Dict[str, Any], tg_space: TelegramID, def matrix_reply_to_telegram(content: Dict[str, Any], tg_space: TelegramID,
room_id: Optional[MatrixRoomID] = None) -> Optional[TelegramID]: room_id: Optional[RoomID] = None) -> Optional[TelegramID]:
relates_to = content.get("m.relates_to", None) or {} relates_to = content.get("m.relates_to", None) or {}
if not relates_to: if not relates_to:
return None return None
@@ -1,65 +0,0 @@
# mautrix-telegram - A Matrix-Telegram puppeting bridge
# Copyright (C) 2019 Tulir Asokan
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from typing import Dict, List, Tuple
from html.parser import HTMLParser
class HTMLNode(list):
def __init__(self, tag: str, attrs: List[Tuple[str, str]]):
super().__init__()
self.tag = tag # type: str
self.text = "" # type: str
self.tail = "" # type: str
self.attrib = dict(attrs) # type: Dict[str, str]
class NodeifyingParser(HTMLParser):
# From https://www.w3.org/TR/html5/syntax.html#writing-html-documents-elements
void_tags = ("area", "base", "br", "col", "command", "embed", "hr", "img", "input", "link",
"meta", "param", "source", "track", "wbr")
def __init__(self):
super().__init__()
self.stack = [HTMLNode("html", [])] # type: List[HTMLNode]
def handle_starttag(self, tag, attrs):
node = HTMLNode(tag, attrs)
self.stack[-1].append(node)
if tag not in self.void_tags:
self.stack.append(node)
def handle_startendtag(self, tag, attrs):
self.stack[-1].append(HTMLNode(tag, attrs))
def handle_endtag(self, tag):
if tag == self.stack[-1].tag:
self.stack.pop()
def handle_data(self, data):
if len(self.stack[-1]) > 0:
self.stack[-1][-1].tail += data
else:
self.stack[-1].text += data
def error(self, message):
pass
def read_html(data: str) -> HTMLNode:
parser = NodeifyingParser()
parser.feed(data)
return parser.stack[0]
@@ -1,11 +0,0 @@
from typing import Dict, List
class HTMLNode(List['HTMLNode']):
tag: str
text: str
tail: str
attrib: Dict[str, str]
def read_html(data: str) -> HTMLNode: ...
+50 -213
View File
@@ -13,240 +13,77 @@
# #
# You should have received a copy of the GNU Affero General Public License # You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>. # along with this program. If not, see <https://www.gnu.org/licenses/>.
from typing import List, Tuple, Pattern from typing import List, Tuple, Optional
import re
from telethon.tl.types import (MessageEntityMention as Mention, MessageEntityBotCommand as Command, from telethon.tl.types import TypeMessageEntity
MessageEntityMentionName as MentionName, MessageEntityUrl as URL,
MessageEntityEmail as Email, MessageEntityTextUrl as TextURL, from mautrix.types import UserID, RoomID
MessageEntityBold as Bold, MessageEntityItalic as Italic, from mautrix.util.formatter import MatrixParser as BaseMatrixParser, RecursionContext
MessageEntityCode as Code, MessageEntityPre as Pre, from mautrix.util.formatter.html_reader_htmlparser import read_html, HTMLNode
MessageEntityStrike as Strike, MessageEntityUnderline as Underline,
MessageEntityBlockquote as Blockquote, TypeMessageEntity)
from ... import user as u, puppet as pu, portal as po from ... import user as u, puppet as pu, portal as po
from ...types import MatrixUserID from .telegram_message import TelegramMessage, TelegramEntityType
from .telegram_message import TelegramMessage, Entity, offset_length_multiply
from .html_reader import HTMLNode, read_html
ParsedMessage = Tuple[str, List[TypeMessageEntity]] ParsedMessage = Tuple[str, List[TypeMessageEntity]]
def parse_html(input_html: str) -> ParsedMessage: def parse_html(input_html: str) -> ParsedMessage:
return MatrixParser.parse(input_html) msg = MatrixParser.parse(input_html)
return msg.text, msg.telegram_entities
class RecursionContext: class MatrixParser(BaseMatrixParser[TelegramMessage]):
def __init__(self, strip_linebreaks: bool = True, ul_depth: int = 0): e = TelegramEntityType
self.strip_linebreaks = strip_linebreaks # type: bool fs = TelegramMessage
self.ul_depth = ul_depth # type: int read_html = read_html
self._inited = True # type: bool
def __setattr__(self, key, value):
if getattr(self, "_inited", False) is True:
raise TypeError("'RecursionContext' object is immutable")
super(RecursionContext, self).__setattr__(key, value)
def enter_list(self) -> 'RecursionContext':
return RecursionContext(strip_linebreaks=self.strip_linebreaks, ul_depth=self.ul_depth + 1)
def enter_code_block(self) -> 'RecursionContext':
return RecursionContext(strip_linebreaks=False, ul_depth=self.ul_depth)
class MatrixParser:
mention_regex = re.compile("https://matrix.to/#/(@.+:.+)") # type: Pattern
room_regex = re.compile("https://matrix.to/#/(#.+:.+)") # type: Pattern
block_tags = ("p", "pre", "blockquote",
"ol", "ul", "li",
"h1", "h2", "h3", "h4", "h5", "h6",
"div", "hr", "table") # type: Tuple[str, ...]
list_bullets = ("", "", "", "") # type: Tuple[str, ...]
@classmethod @classmethod
def list_bullet(cls, depth: int) -> str: def custom_node_to_fstring(cls, node: HTMLNode, ctx: RecursionContext
return cls.list_bullets[(depth - 1) % len(cls.list_bullets)] + " " ) -> Optional[TelegramMessage]:
@classmethod
def list_to_tmessage(cls, node: HTMLNode, ctx: RecursionContext) -> TelegramMessage:
ordered = node.tag == "ol"
tagged_children = cls.node_to_tagged_tmessages(node, ctx)
counter = 1
indent_length = 0
if ordered:
try:
counter = int(node.attrib.get("start", "1"))
except ValueError:
counter = 1
longest_index = counter - 1 + len(tagged_children)
indent_length = len(str(longest_index))
indent = (indent_length + 4) * " "
children = [] # type: List[TelegramMessage]
for child, tag in tagged_children:
if tag != "li":
continue
if ordered:
prefix = f"{counter}. "
counter += 1
else:
prefix = cls.list_bullet(ctx.ul_depth)
child = child.prepend(prefix)
parts = child.split("\n")
parts = parts[:1] + [part.prepend(indent) for part in parts[1:]]
child = TelegramMessage.join(parts, "\n")
children.append(child)
return TelegramMessage.join(children, "\n")
@classmethod
def header_to_tmessage(cls, node: HTMLNode, ctx: RecursionContext) -> TelegramMessage:
children = cls.node_to_tmessages(node, ctx)
length = int(node.tag[1])
prefix = "#" * length + " "
return TelegramMessage.join(children, "").prepend(prefix).format(Bold)
@classmethod
def basic_format_to_tmessage(cls, node: HTMLNode, ctx: RecursionContext) -> TelegramMessage:
msg = cls.tag_aware_parse_node(node, ctx) msg = cls.tag_aware_parse_node(node, ctx)
if node.tag in ("b", "strong"): if node.tag == "command":
msg.format(Bold) msg.format(TelegramEntityType.COMMAND)
elif node.tag in ("i", "em"): return None
msg.format(Italic)
elif node.tag in ("s", "strike", "del"):
msg.format(Strike)
elif node.tag in ("u", "ins"):
msg.format(Underline)
elif node == "blockquote":
msg.format(Blockquote)
elif node.tag == "command":
msg.format(Command)
@classmethod
def user_pill_to_fstring(cls, msg: TelegramMessage, user_id: UserID) -> TelegramMessage:
user = (pu.Puppet.get_by_mxid(user_id)
or u.User.get_by_mxid(user_id, create=False))
if not user:
return msg
if user.username:
return TelegramMessage(f"@{user.username}").format(TelegramEntityType.MENTION)
elif user.tgid:
displayname = user.plain_displayname or msg.text
return TelegramMessage(displayname).format(TelegramEntityType.MENTION_NAME,
user_id=user.tgid)
return msg return msg
@classmethod @classmethod
def link_to_tstring(cls, node: HTMLNode, ctx: RecursionContext) -> TelegramMessage: def url_to_fstring(cls, msg: TelegramMessage, url: str) -> TelegramMessage:
msg = cls.tag_aware_parse_node(node, ctx) if url == msg.text:
href = node.attrib.get("href", "") return msg.format(cls.e.URL)
if not href: else:
return msg return msg.format(cls.e.INLINE_URL, url=url)
if href.startswith("mailto:"):
return TelegramMessage(href[len("mailto:"):]).format(Email)
mention = cls.mention_regex.match(href)
if mention:
mxid = MatrixUserID(mention.group(1))
user = (pu.Puppet.get_by_mxid(mxid)
or u.User.get_by_mxid(mxid, create=False))
if not user:
return msg
if user.username:
return TelegramMessage(f"@{user.username}").format(Mention)
elif user.tgid:
displayname = user.plain_displayname or msg.text
return TelegramMessage(displayname).format(MentionName, user_id=user.tgid)
return msg
room = cls.room_regex.match(href)
if room:
username = po.Portal.get_username_from_mx_alias(room.group(1))
portal = po.Portal.find_by_username(username)
if portal and portal.username:
return TelegramMessage(f"@{portal.username}").format(Mention)
return (msg.format(URL)
if msg.text == href
else msg.format(TextURL, url=href))
@classmethod @classmethod
def blockquote_to_tmessage(cls, node: HTMLNode, ctx: RecursionContext) -> TelegramMessage: def room_pill_to_fstring(cls, msg: TelegramMessage, room_id: RoomID) -> TelegramMessage:
username = po.Portal.get_username_from_mx_alias(room_id)
portal = po.Portal.find_by_username(username)
if portal and portal.username:
return TelegramMessage(f"@{portal.username}").format(TelegramEntityType.MENTION)
@classmethod
def header_to_fstring(cls, node: HTMLNode, ctx: RecursionContext) -> TelegramMessage:
children = cls.node_to_fstrings(node, ctx)
length = int(node.tag[1])
prefix = "#" * length + " "
return TelegramMessage.join(children, "").prepend(prefix).format(TelegramEntityType.BOLD)
@classmethod
def blockquote_to_fstring(cls, node: HTMLNode, ctx: RecursionContext) -> TelegramMessage:
msg = cls.tag_aware_parse_node(node, ctx) msg = cls.tag_aware_parse_node(node, ctx)
children = msg.trim().split("\n") children = msg.trim().split("\n")
children = [child.prepend("> ") for child in children] children = [child.prepend("> ") for child in children]
return TelegramMessage.join(children, "\n") return TelegramMessage.join(children, "\n")
@classmethod
def node_to_tmessage(cls, node: HTMLNode, ctx: RecursionContext) -> TelegramMessage:
if node.tag == "mx-reply":
return TelegramMessage("")
elif node.tag == "ol":
return cls.list_to_tmessage(node, ctx)
elif node.tag == "ul":
return cls.list_to_tmessage(node, ctx.enter_list())
elif node.tag in ("h1", "h2", "h3", "h4", "h5", "h6"):
return cls.header_to_tmessage(node, ctx)
elif node.tag == "br":
return TelegramMessage("\n")
elif node.tag in ("b", "strong", "i", "em", "s", "del", "u", "ins", "command"):
return cls.basic_format_to_tmessage(node, ctx)
elif node.tag == "blockquote":
# Telegram already has blockquote entities in the protocol schema, but it strips them
# server-side and none of the official clients support them.
# TODO once Telegram changes that, use the above if block for blockquotes too.
return cls.blockquote_to_tmessage(node, ctx)
elif node.tag == "a":
return cls.link_to_tstring(node, ctx)
elif node.tag == "p":
return cls.tag_aware_parse_node(node, ctx).append("\n")
elif node.tag == "pre":
lang = ""
try:
if node[0].tag == "code":
node = node[0]
lang = node.attrib["class"][len("language-"):]
except (IndexError, KeyError):
pass
return cls.parse_node(node, ctx.enter_code_block()).format(Pre, language=lang)
elif node.tag == "code":
return cls.parse_node(node, ctx.enter_code_block()).format(Code)
return cls.tag_aware_parse_node(node, ctx)
@staticmethod
def text_to_tmessage(text: str, ctx: RecursionContext) -> TelegramMessage:
if ctx.strip_linebreaks:
text = text.replace("\n", "")
return TelegramMessage(text)
@classmethod
def node_to_tagged_tmessages(cls, node: HTMLNode, ctx: RecursionContext
) -> List[Tuple[TelegramMessage, str]]:
output = []
if node.text:
output.append((cls.text_to_tmessage(node.text, ctx), "text"))
for child in node:
output.append((cls.node_to_tmessage(child, ctx), child.tag))
if child.tail:
output.append((cls.text_to_tmessage(child.tail, ctx), "text"))
return output
@classmethod
def node_to_tmessages(cls, node: HTMLNode, ctx: RecursionContext
) -> List[TelegramMessage]:
return [msg for (msg, tag) in cls.node_to_tagged_tmessages(node, ctx)]
@classmethod
def tag_aware_parse_node(cls, node: HTMLNode, ctx: RecursionContext
) -> TelegramMessage:
msgs = cls.node_to_tagged_tmessages(node, ctx)
output = TelegramMessage()
prev_was_block = False
for msg, tag in msgs:
if tag in cls.block_tags:
msg = msg.append("\n")
if not prev_was_block:
msg = msg.prepend("\n")
prev_was_block = True
output = output.append(msg)
return output.trim()
@classmethod
def parse_node(cls, node: HTMLNode, ctx: RecursionContext) -> TelegramMessage:
return TelegramMessage.join(cls.node_to_tmessages(node, ctx))
@classmethod
def parse(cls, data: str) -> ParsedMessage:
msg = cls.node_to_tmessage(read_html(f"<body>{data}</body>"), RecursionContext())
return msg.text, msg.entities
@@ -13,145 +13,84 @@
# #
# You should have received a copy of the GNU Affero General Public License # You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>. # along with this program. If not, see <https://www.gnu.org/licenses/>.
from typing import Callable, List, Optional, Sequence, Type, Union from typing import Optional, Union, Any, List, Type, Dict
from enum import Enum
from telethon.tl.types import (MessageEntityMentionName as MentionName, from telethon.tl.types import (MessageEntityMention as Mention, MessageEntityBotCommand as Command,
MessageEntityTextUrl as TextURL, MessageEntityPre as Pre, MessageEntityMentionName as MentionName, MessageEntityUrl as URL,
TypeMessageEntity, InputMessageEntityMentionName as InputMentionName) MessageEntityEmail as Email, MessageEntityTextUrl as TextURL,
MessageEntityBold as Bold, MessageEntityItalic as Italic,
MessageEntityCode as Code, MessageEntityPre as Pre,
MessageEntityStrike as Strike, MessageEntityUnderline as Underline,
MessageEntityBlockquote as Blockquote, TypeMessageEntity,
InputMessageEntityMentionName as InputMentionName)
from mautrix.util.formatter import EntityString, SemiAbstractEntity
class Entity: class TelegramEntityType(Enum):
@staticmethod """EntityType is a Matrix formatting entity type."""
def copy(entity: TypeMessageEntity) -> Optional[TypeMessageEntity]: BOLD = Bold
if not entity: ITALIC = Italic
return None STRIKETHROUGH = Strike
kwargs = { UNDERLINE = Underline
"offset": entity.offset, URL = URL
"length": entity.length, INLINE_URL = TextURL
} EMAIL = Email
if isinstance(entity, Pre): PREFORMATTED = Pre
kwargs["language"] = entity.language INLINE_CODE = Code
elif isinstance(entity, TextURL): BLOCKQUOTE = Blockquote
kwargs["url"] = entity.url MENTION = Mention
elif isinstance(entity, (MentionName, InputMentionName)): MENTION_NAME = MentionName
kwargs["user_id"] = entity.user_id COMMAND = Command
return entity.__class__(**kwargs)
@classmethod USER_MENTION = 1
def adjust(cls, entity: Union[TypeMessageEntity, List[TypeMessageEntity]], ROOM_MENTION = 2
func: Callable[[TypeMessageEntity], None] HEADER = 3
) -> Union[Optional[TypeMessageEntity], List[TypeMessageEntity]]:
if isinstance(entity, list):
return [Entity.adjust(element, func) for element in entity if entity]
elif not entity:
return None
entity = cls.copy(entity)
func(entity)
if entity.offset < 0:
entity.length += entity.offset
entity.offset = 0
return entity
def offset_diff(amount: int) -> Callable[[TypeMessageEntity], None]: class TelegramEntity(SemiAbstractEntity):
def func(entity: TypeMessageEntity) -> None: internal: TypeMessageEntity
entity.offset += amount
return func def __init__(self, type: Union[TelegramEntityType, Type[TypeMessageEntity]],
offset: int, length: int, extra_info: Dict[str, Any]) -> None:
if isinstance(type, TelegramEntityType):
if isinstance(type.value, int):
raise ValueError(f"Can't create Entity with non-Telegram EntityType {type}")
type = type.value
self.internal = type(offset=offset, length=length, **extra_info)
def copy(self) -> Optional['TelegramEntity']:
extra_info = {}
if isinstance(self.internal, Pre):
extra_info["language"] = self.internal.language
elif isinstance(self.internal, TextURL):
extra_info["url"] = self.internal.url
elif isinstance(self.internal, (MentionName, InputMentionName)):
extra_info["user_id"] = self.internal.user_id
return TelegramEntity(type(self.internal), offset=self.internal.offset,
length=self.internal.length, extra_info=extra_info)
@property
def offset(self) -> int:
return self.internal.offset
@offset.setter
def offset(self, value: int) -> None:
self.internal.offset = value
@property
def length(self) -> int:
return self.internal.length
@length.setter
def length(self, value: int) -> None:
self.internal.length = value
def offset_length_multiply(amount: int) -> Callable[[TypeMessageEntity], None]: class TelegramMessage(EntityString[TelegramEntity, TelegramEntityType]):
def func(entity: TypeMessageEntity) -> None: entity_class = TelegramEntity
entity.offset *= amount
entity.length *= amount
return func @property
def telegram_entities(self) -> List[TypeMessageEntity]:
return [entity.internal for entity in self.entities]
class TelegramMessage:
def __init__(self, text: str = "", entities: Optional[List[TypeMessageEntity]] = None) -> None:
self.text = text # type: str
self.entities = entities or [] # type: List[TypeMessageEntity]
def offset_entities(self, offset: int) -> 'TelegramMessage':
def apply_offset(entity: TypeMessageEntity, inner_offset: int
) -> Optional[TypeMessageEntity]:
entity = Entity.copy(entity)
entity.offset += inner_offset
if entity.offset < 0:
entity.offset = 0
elif entity.offset > len(self.text):
return None
elif entity.offset + entity.length > len(self.text):
entity.length = len(self.text) - entity.offset
return entity
self.entities = [apply_offset(entity, offset) for entity in self.entities if entity]
self.entities = [x for x in self.entities if x is not None]
return self
def append(self, *args: Union[str, 'TelegramMessage']) -> 'TelegramMessage':
for msg in args:
if isinstance(msg, str):
msg = TelegramMessage(text=msg)
self.entities += Entity.adjust(msg.entities, offset_diff(len(self.text)))
self.text += msg.text
return self
def prepend(self, *args: Union[str, 'TelegramMessage']) -> 'TelegramMessage':
for msg in args:
if isinstance(msg, str):
msg = TelegramMessage(text=msg)
self.entities = msg.entities + Entity.adjust(self.entities, offset_diff(len(msg.text)))
self.text = msg.text + self.text
return self
def format(self, entity_type: Type[TypeMessageEntity], offset: int = None, length: int = None,
**kwargs) -> 'TelegramMessage':
self.entities.append(entity_type(offset=offset or 0,
length=length if length is not None else len(self.text),
**kwargs))
return self
def concat(self, *args: Union[str, 'TelegramMessage']) -> 'TelegramMessage':
return TelegramMessage().append(self, *args)
def trim(self) -> 'TelegramMessage':
orig_len = len(self.text)
self.text = self.text.lstrip()
diff = orig_len - len(self.text)
self.text = self.text.rstrip()
self.offset_entities(-diff)
return self
def split(self, separator, max_items: int = 0) -> List['TelegramMessage']:
text_parts = self.text.split(separator, max_items - 1)
output = [] # type: List[TelegramMessage]
offset = 0
for part in text_parts:
msg = TelegramMessage(part)
for entity in self.entities:
start_in_range = len(part) > entity.offset - offset >= 0
end_in_range = len(part) >= entity.offset - offset + entity.length > 0
if start_in_range and end_in_range:
msg.entities.append(Entity.adjust(entity, offset_diff(-offset)))
output.append(msg)
offset += len(part)
offset += len(separator)
return output
@staticmethod
def join(items: Sequence[Union[str, 'TelegramMessage']],
separator: str = " ") -> 'TelegramMessage':
main = TelegramMessage()
for msg in items:
if isinstance(msg, str):
msg = TelegramMessage(text=msg)
main.entities += Entity.adjust(msg.entities, offset_diff(len(main.text)))
main.text += msg.text + separator
if len(separator) > 0:
main.text = main.text[:-len(separator)]
return main
+7 -8
View File
@@ -25,6 +25,7 @@ from telethon.tl.types import (MessageEntityMention, MessageEntityMentionName, M
MessageEntityPhone, TypeMessageEntity, Message, PeerChannel, MessageEntityPhone, TypeMessageEntity, Message, PeerChannel,
MessageEntityBlockquote, MessageEntityStrike, MessageFwdHeader, MessageEntityBlockquote, MessageEntityStrike, MessageFwdHeader,
MessageEntityUnderline, PeerUser) MessageEntityUnderline, PeerUser)
from telethon.helpers import add_surrogate, del_surrogate
from mautrix.errors import MatrixRequestError from mautrix.errors import MatrixRequestError
from mautrix.appservice import IntentAPI from mautrix.appservice import IntentAPI
@@ -34,7 +35,6 @@ from mautrix.types import (TextMessageEventContent, RelatesTo, RelationType, For
from .. import user as u, puppet as pu, portal as po from .. import user as u, puppet as pu, portal as po
from ..types import TelegramID from ..types import TelegramID
from ..db import Message as DBMessage from ..db import Message as DBMessage
from .util import (add_surrogates, remove_surrogates)
if TYPE_CHECKING: if TYPE_CHECKING:
from ..abstract_user import AbstractUser from ..abstract_user import AbstractUser
@@ -136,7 +136,7 @@ async def telegram_to_matrix(evt: Message, source: "AbstractUser",
no_reply_fallback: bool = False) -> TextMessageEventContent: no_reply_fallback: bool = False) -> TextMessageEventContent:
content = TextMessageEventContent( content = TextMessageEventContent(
msgtype=MessageType.TEXT, msgtype=MessageType.TEXT,
body=add_surrogates(override_text or evt.message), body=add_surrogate(override_text or evt.message),
) )
entities = override_entities or evt.entities entities = override_entities or evt.entities
if entities: if entities:
@@ -163,11 +163,10 @@ async def telegram_to_matrix(evt: Message, source: "AbstractUser",
content.body += f"\n- {evt.post_author}" content.body += f"\n- {evt.post_author}"
content.formatted_body += f"<br/><i>- <u>{evt.post_author}</u></i>" content.formatted_body += f"<br/><i>- <u>{evt.post_author}</u></i>"
if content.formatted_body: content.body = del_surrogate(content.body)
content.formatted_body = content.formatted_body.replace("\n", "<br/>")
content.body = remove_surrogates(content.body) if content.formatted_body:
content.formatted_body = remove_surrogates(content.formatted_body) content.formatted_body = del_surrogate(content.formatted_body.replace("\n", "<br/>"))
return content return content
@@ -284,8 +283,8 @@ def _parse_name_mention(html: List[str], entity_text: str, user_id: TelegramID)
return False return False
message_link_regex = re.compile( message_link_regex = re.compile(r"https?://t(?:elegram)?\.(?:me|dog)/"
r"https?://t(?:elegram)?\.(?:me|dog)/([A-Za-z][A-Za-z0-9_]{3,}[A-Za-z0-9])/([0-9]{1,50})") r"([A-Za-z][A-Za-z0-9_]{3,}[A-Za-z0-9])/([0-9]{1,50})")
def _parse_url(html: List[str], entity_text: str, url: str) -> bool: def _parse_url(html: List[str], entity_text: str, url: str) -> bool:
-34
View File
@@ -1,34 +0,0 @@
# mautrix-telegram - A Matrix-Telegram puppeting bridge
# Copyright (C) 2019 Tulir Asokan
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from typing import Optional, Pattern
import struct
import re
# add_surrogates and remove_surrogates are unicode surrogate utility functions from Telethon.
# Licensed under the MIT license.
# https://github.com/LonamiWebs/Telethon/blob/7cce7aa3e4c6c7019a55530391b1761d33e5a04e/telethon/helpers.py
def add_surrogates(text: Optional[str]) -> Optional[str]:
if text is None:
return None
return "".join("".join(chr(y) for y in struct.unpack("<HH", x.encode("utf-16-le")))
if (0x10000 <= ord(x) <= 0x10FFFF) else x for x in text)
def remove_surrogates(text: Optional[str]) -> Optional[str]:
if text is None:
return None
return text.encode("utf-16", "surrogatepass").decode("utf-16")
-1
View File
@@ -1,4 +1,3 @@
from .file_transfer import transfer_file_to_matrix, convert_image from .file_transfer import transfer_file_to_matrix, convert_image
from .format_duration import format_duration from .format_duration import format_duration
from .signed_token import sign_token, verify_token
from .recursive_dict import recursive_del, recursive_set, recursive_get from .recursive_dict import recursive_del, recursive_set, recursive_get
+6 -5
View File
@@ -38,6 +38,7 @@ try:
from PIL import Image from PIL import Image
except ImportError: except ImportError:
Image = None Image = None
try: try:
from moviepy.editor import VideoFileClip from moviepy.editor import VideoFileClip
import random import random
@@ -47,7 +48,7 @@ try:
except ImportError: except ImportError:
VideoFileClip = random = string = os = mimetypes = None VideoFileClip = random = string = os = mimetypes = None
log = logging.getLogger("mau.util") # type: logging.Logger log: logging.Logger = logging.getLogger("mau.util")
TypeLocation = Union[Document, InputDocumentFileLocation, InputPeerPhotoFileLocation, TypeLocation = Union[Document, InputDocumentFileLocation, InputPeerPhotoFileLocation,
InputFileLocation, InputPhotoFileLocation] InputFileLocation, InputPhotoFileLocation]
@@ -59,7 +60,7 @@ def convert_image(file: bytes, source_mime: str = "image/webp", target_type: str
if not Image: if not Image:
return source_mime, file, None, None return source_mime, file, None, None
try: try:
image = Image.open(BytesIO(file)).convert("RGBA") # type: Image.Image image: Image.Image = Image.open(BytesIO(file)).convert("RGBA")
if thumbnail_to: if thumbnail_to:
image.thumbnail(thumbnail_to, Image.ANTIALIAS) image.thumbnail(thumbnail_to, Image.ANTIALIAS)
new_file = BytesIO() new_file = BytesIO()
@@ -134,7 +135,7 @@ async def transfer_thumbnail_to_matrix(client: MautrixTelegramClient, intent: In
width, height = None, None width, height = None, None
mime_type = magic.from_buffer(file, mime=True) mime_type = magic.from_buffer(file, mime=True)
content_uri = await intent.upload_file(file, mime_type) content_uri = await intent.upload_media(file, mime_type)
db_file = DBTelegramFile(id=loc_id, mxc=content_uri, mime_type=mime_type, db_file = DBTelegramFile(id=loc_id, mxc=content_uri, mime_type=mime_type,
was_converted=False, timestamp=int(time.time()), size=len(file), was_converted=False, timestamp=int(time.time()), size=len(file),
@@ -148,7 +149,7 @@ async def transfer_thumbnail_to_matrix(client: MautrixTelegramClient, intent: In
return db_file return db_file
transfer_locks = {} # type: Dict[str, asyncio.Lock] transfer_locks: Dict[str, asyncio.Lock] = {}
TypeThumbnail = Optional[Union[TypeLocation, TypePhotoSize]] TypeThumbnail = Optional[Union[TypeLocation, TypePhotoSize]]
@@ -202,7 +203,7 @@ async def _unlocked_transfer_file_to_matrix(client: MautrixTelegramClient, inten
mime_type = new_mime_type mime_type = new_mime_type
thumbnail = None thumbnail = None
content_uri = await intent.upload_file(file, mime_type) content_uri = await intent.upload_media(file, mime_type)
db_file = DBTelegramFile(id=loc_id, mxc=content_uri, db_file = DBTelegramFile(id=loc_id, mxc=content_uri,
mime_type=mime_type, was_converted=image_converted, mime_type=mime_type, was_converted=image_converted,
-52
View File
@@ -1,52 +0,0 @@
# mautrix-telegram - A Matrix-Telegram puppeting bridge
# Copyright (C) 2019 Tulir Asokan
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from typing import Dict, Optional
import json
import base64
import hashlib
def _get_checksum(key: str, payload: bytes) -> str:
hasher = hashlib.sha256()
hasher.update(payload)
hasher.update(key.encode("utf-8"))
checksum = hasher.hexdigest()
return checksum
def sign_token(key: str, payload: Dict) -> str:
payload_b64 = base64.urlsafe_b64encode(json.dumps(payload).encode("utf-8"))
checksum = _get_checksum(key, payload_b64)
return f"{checksum}:{payload_b64.decode('utf-8')}"
def verify_token(key: str, data: str) -> Optional[Dict]:
if not data:
return None
try:
checksum, payload = data.split(":", 1)
except ValueError:
return None
if checksum != _get_checksum(key, payload.encode("utf-8")):
return None
payload = base64.urlsafe_b64decode(payload).decode("utf-8")
try:
return json.loads(payload)
except json.JSONDecodeError:
return None
+1 -1
View File
@@ -25,8 +25,8 @@ from aiohttp import web
import pkg_resources import pkg_resources
from mautrix.types import UserID from mautrix.types import UserID
from mautrix.util.signed_token import sign_token, verify_token
from ...util import sign_token, verify_token
from ...user import User from ...user import User
from ...puppet import Puppet from ...puppet import Puppet
from ..common import AuthAPI from ..common import AuthAPI