Switch from SQLAlchemy to asyncpg/aiosqlite

This commit is contained in:
Tulir Asokan
2021-12-20 22:39:09 +02:00
parent f12f3fe007
commit 89ab29ea5f
61 changed files with 4681 additions and 4628 deletions
@@ -1,5 +1,5 @@
# mautrix-telegram - A Matrix-Telegram puppeting bridge
# Copyright (C) 2019 Tulir Asokan
# Copyright (C) 2021 Tulir Asokan
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
@@ -13,39 +13,77 @@
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from typing import Optional, List, Tuple, Callable, Pattern, Match, TYPE_CHECKING
import re
import logging
from __future__ import annotations
from telethon.tl.types import (MessageEntityMention, MessageEntityMentionName, MessageEntityItalic,
TypeMessageEntity, InputMessageEntityMentionName)
import re
from telethon.tl.types import MessageEntityItalic, TypeMessageEntity
from telethon.helpers import add_surrogate, del_surrogate
from telethon import TelegramClient
from mautrix.types import RoomID, MessageEventContent
from mautrix.util.logging import TraceLogger
from ... import puppet as pu
from ...types import TelegramID
from ...db import Message as DBMessage
from .parser import ParsedMessage, parse_html
from .parser import MatrixParser
if TYPE_CHECKING:
from ...context import Context
log: TraceLogger = logging.getLogger("mau.fmt.mx")
should_bridge_plaintext_highlights: bool = False
command_regex: Pattern = re.compile(r"^!([A-Za-z0-9@]+)")
not_command_regex: Pattern = re.compile(r"^\\(![A-Za-z0-9@]+)")
plain_mention_regex: Optional[Pattern] = None
command_regex = re.compile(r"^!([A-Za-z0-9@]+)")
not_command_regex = re.compile(r"^\\(![A-Za-z0-9@]+)")
MAX_LENGTH = 4096
CUTOFF_TEXT = " [message cut]"
CUT_MAX_LENGTH = MAX_LENGTH - len(CUTOFF_TEXT)
def _cut_long_message(message: str, entities: List[TypeMessageEntity]) -> ParsedMessage:
class FormatError(Exception):
pass
async def matrix_reply_to_telegram(
content: MessageEventContent, tg_space: TelegramID, room_id: RoomID | None = None
) -> TelegramID | None:
event_id = content.get_reply_to()
if not event_id:
return
content.trim_reply_fallback()
message = await DBMessage.get_by_mxid(event_id, room_id, tg_space)
if message:
return message.tgid
return None
async def matrix_to_telegram(
client: TelegramClient, *, text: str | None = None, html: str | None = None
) -> tuple[str, list[TypeMessageEntity]]:
if html is not None:
return await _matrix_html_to_telegram(client, html)
elif text is not None:
return _matrix_text_to_telegram(text), []
else:
raise ValueError("text or html must be provided to convert formatting")
async def _matrix_html_to_telegram(
client: TelegramClient, html: str
) -> tuple[str, list[TypeMessageEntity]]:
try:
html = command_regex.sub(r"<command>\1</command>", html)
html = html.replace("\t", " " * 4)
html = not_command_regex.sub(r"\1", html)
parsed = await MatrixParser(client).parse(add_surrogate(html))
text = del_surrogate(parsed.text.strip())
text, entities = _cut_long_message(text, parsed.telegram_entities)
return text, entities
except Exception as e:
raise FormatError(f"Failed to convert Matrix format: {html}") from e
def _cut_long_message(
message: str, entities: list[TypeMessageEntity]
) -> tuple[str, list[TypeMessageEntity]]:
if len(message) > MAX_LENGTH:
message = message[0:CUT_MAX_LENGTH] + CUTOFF_TEXT
new_entities = []
@@ -60,112 +98,8 @@ def _cut_long_message(message: str, entities: List[TypeMessageEntity]) -> Parsed
return message, entities
class FormatError(Exception):
pass
def matrix_reply_to_telegram(content: MessageEventContent, tg_space: TelegramID,
room_id: Optional[RoomID] = None) -> Optional[TelegramID]:
event_id = content.get_reply_to()
if not event_id:
return
content.trim_reply_fallback()
message = DBMessage.get_by_mxid(event_id, room_id, tg_space)
if message:
return message.tgid
return None
async def matrix_to_telegram(client: TelegramClient, *, text: Optional[str] = None,
html: Optional[str] = None) -> ParsedMessage:
if html is not None:
text, entities = _matrix_html_to_telegram(html)
elif text is not None:
text, entities = _matrix_text_to_telegram(text)
else:
raise ValueError("text or html must be provided to convert formatting")
await _fix_name_mentions(client, entities)
return text, entities
def _matrix_html_to_telegram(html: str) -> ParsedMessage:
try:
html = command_regex.sub(r"<command>\1</command>", html)
html = html.replace("\t", " " * 4)
html = not_command_regex.sub(r"\1", html)
if should_bridge_plaintext_highlights:
html = plain_mention_regex.sub(_plain_mention_to_html, html)
text, entities = parse_html(add_surrogate(html))
text = del_surrogate(text.strip())
text, entities = _cut_long_message(text, entities)
return text, entities
except Exception as e:
raise FormatError(f"Failed to convert Matrix format: {html}") from e
def _matrix_text_to_telegram(text: str) -> ParsedMessage:
def _matrix_text_to_telegram(text: str) -> str:
text = command_regex.sub(r"/\1", text)
text = text.replace("\t", " " * 4)
text = not_command_regex.sub(r"\1", text)
if should_bridge_plaintext_highlights:
entities, pmr_replacer = _plain_mention_to_text()
text = plain_mention_regex.sub(pmr_replacer, text)
else:
entities = []
return text, entities
async def _fix_name_mentions(client: TelegramClient, entities: List[TypeMessageEntity]) -> None:
for index in reversed(range(len(entities))):
entity = entities[index]
if isinstance(entity, (MessageEntityMentionName, InputMessageEntityMentionName)):
try:
user = await client.get_input_entity(entity.user_id)
except (ValueError, TypeError) as e:
log.trace(f"Dropping mention of {entity.user_id}: {e}")
del entities[index]
else:
entities[index] = InputMessageEntityMentionName(entity.offset, entity.length, user)
def _plain_mention_to_text() -> Tuple[List[TypeMessageEntity], Callable[[Match], str]]:
entities = []
def replacer(match: Match) -> str:
puppet = pu.Puppet.find_by_displayname(match.group(2))
if puppet:
offset = match.start()
length = match.end() - offset
if puppet.username:
entity = MessageEntityMention(offset, length)
text = f"@{puppet.username}"
else:
entity = MessageEntityMentionName(offset, length, user_id=puppet.tgid)
text = puppet.displayname
entities.append(entity)
return text
return "".join(match.groups())
return entities, replacer
def _plain_mention_to_html(match: Match) -> str:
puppet = pu.Puppet.find_by_displayname(match.group(2))
if puppet:
return (f"{match.group(1)}"
f"<a href='https://matrix.to/#/{puppet.mxid}'>"
f"{puppet.displayname}"
"</a>")
return "".join(match.groups())
def init_mx(context: "Context") -> None:
global plain_mention_regex, should_bridge_plaintext_highlights
config = context.config
dn_template = config["bridge.displayname_template"]
dn_template = re.escape(dn_template).replace(re.escape("{displayname}"), "[^>]+")
plain_mention_regex = re.compile(f"^({dn_template})")
should_bridge_plaintext_highlights = config["bridge.plaintext_highlights"]
return text
@@ -1,5 +1,5 @@
# mautrix-telegram - A Matrix-Telegram puppeting bridge
# Copyright (C) 2019 Tulir Asokan
# Copyright (C) 2021 Tulir Asokan
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
@@ -13,77 +13,80 @@
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from typing import List, Tuple, Optional
from __future__ import annotations
from telethon.tl.types import TypeMessageEntity
import logging
from telethon import TelegramClient
from mautrix.types import UserID, RoomID
from mautrix.util.formatter import MatrixParser as BaseMatrixParser, RecursionContext
from mautrix.util.formatter.html_reader_htmlparser import read_html, HTMLNode
from mautrix.util.logging import TraceLogger
from ... import user as u, puppet as pu, portal as po
from .telegram_message import TelegramMessage, TelegramEntityType
ParsedMessage = Tuple[str, List[TypeMessageEntity]]
def parse_html(input_html: str) -> ParsedMessage:
msg = MatrixParser.parse(input_html)
return msg.text, msg.telegram_entities
log: TraceLogger = logging.getLogger("mau.fmt.mx")
class MatrixParser(BaseMatrixParser[TelegramMessage]):
e = TelegramEntityType
fs = TelegramMessage
read_html = read_html
client: TelegramClient
@classmethod
def custom_node_to_fstring(cls, node: HTMLNode, ctx: RecursionContext
) -> Optional[TelegramMessage]:
msg = cls.tag_aware_parse_node(node, ctx)
def __init__(self, client: TelegramClient) -> None:
self.client = client
self.read_html = read_html
async def custom_node_to_fstring(
self, node: HTMLNode, ctx: RecursionContext
) -> TelegramMessage | None:
msg = await self.tag_aware_parse_node(node, ctx)
if node.tag == "command":
msg.format(TelegramEntityType.COMMAND)
return None
@classmethod
def user_pill_to_fstring(cls, msg: TelegramMessage, user_id: UserID) -> TelegramMessage:
user = (pu.Puppet.deprecated_sync_get_by_mxid(user_id)
or u.User.get_by_mxid(user_id, create=False))
async def user_pill_to_fstring(self, msg: TelegramMessage, user_id: UserID) -> TelegramMessage:
user = (await pu.Puppet.get_by_mxid(user_id)
or await u.User.get_by_mxid(user_id, create=False))
if not user:
return msg
if user.username:
return TelegramMessage(f"@{user.username}").format(TelegramEntityType.MENTION)
if user.tg_username:
return TelegramMessage(f"@{user.tg_username}").format(TelegramEntityType.MENTION)
elif user.tgid:
displayname = user.plain_displayname or msg.text
return TelegramMessage(displayname).format(TelegramEntityType.MENTION_NAME,
user_id=user.tgid)
msg = TelegramMessage(displayname)
try:
input_entity = self.client.get_input_entity(user.tgid)
except (ValueError, TypeError) as e:
log.trace(f"Dropping mention of {user.tgid}: {e}")
else:
msg = msg.format(TelegramEntityType.MENTION_NAME, user_id=input_entity)
return msg
@classmethod
def url_to_fstring(cls, msg: TelegramMessage, url: str) -> TelegramMessage:
async def url_to_fstring(self, msg: TelegramMessage, url: str) -> TelegramMessage:
if url == msg.text:
return msg.format(cls.e.URL)
return msg.format(self.e.URL)
else:
return msg.format(cls.e.INLINE_URL, url=url)
return msg.format(self.e.INLINE_URL, url=url)
@classmethod
def room_pill_to_fstring(cls, msg: TelegramMessage, room_id: RoomID) -> TelegramMessage:
async def room_pill_to_fstring(self, msg: TelegramMessage, room_id: RoomID) -> TelegramMessage:
username = po.Portal.get_username_from_mx_alias(room_id)
portal = po.Portal.find_by_username(username)
portal = await po.Portal.find_by_username(username)
if portal and portal.username:
return TelegramMessage(f"@{portal.username}").format(TelegramEntityType.MENTION)
@classmethod
def header_to_fstring(cls, node: HTMLNode, ctx: RecursionContext) -> TelegramMessage:
children = cls.node_to_fstrings(node, ctx)
async def header_to_fstring(self, node: HTMLNode, ctx: RecursionContext) -> TelegramMessage:
children = await self.node_to_fstrings(node, ctx)
length = int(node.tag[1])
prefix = "#" * length + " "
return TelegramMessage.join(children, "").prepend(prefix).format(TelegramEntityType.BOLD)
@classmethod
def blockquote_to_fstring(cls, node: HTMLNode, ctx: RecursionContext) -> TelegramMessage:
msg = cls.tag_aware_parse_node(node, ctx)
async def blockquote_to_fstring(
self, node: HTMLNode, ctx: RecursionContext
) -> TelegramMessage:
msg = await self.tag_aware_parse_node(node, ctx)
children = msg.trim().split("\n")
children = [child.prepend("> ") for child in children]
return TelegramMessage.join(children, "\n")
@@ -1,5 +1,5 @@
# mautrix-telegram - A Matrix-Telegram puppeting bridge
# Copyright (C) 2019 Tulir Asokan
# Copyright (C) 2021 Tulir Asokan
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
@@ -13,7 +13,9 @@
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from typing import Optional, Union, Any, List, Type, Dict
from __future__ import annotations
from typing import Any, Type
from enum import Enum
from telethon.tl.types import (MessageEntityMention as Mention, MessageEntityBotCommand as Command,
@@ -41,7 +43,7 @@ class TelegramEntityType(Enum):
INLINE_CODE = Code
BLOCKQUOTE = Blockquote
MENTION = Mention
MENTION_NAME = MentionName
MENTION_NAME = InputMentionName
COMMAND = Command
USER_MENTION = 1
@@ -52,15 +54,15 @@ class TelegramEntityType(Enum):
class TelegramEntity(SemiAbstractEntity):
internal: TypeMessageEntity
def __init__(self, type: Union[TelegramEntityType, Type[TypeMessageEntity]],
offset: int, length: int, extra_info: Dict[str, Any]) -> None:
def __init__(self, type: TelegramEntityType | Type[TypeMessageEntity],
offset: int, length: int, extra_info: dict[str, Any]) -> None:
if isinstance(type, TelegramEntityType):
if isinstance(type.value, int):
raise ValueError(f"Can't create Entity with non-Telegram EntityType {type}")
type = type.value
self.internal = type(offset=offset, length=length, **extra_info)
def copy(self) -> Optional['TelegramEntity']:
def copy(self) -> TelegramEntity:
extra_info = {}
if isinstance(self.internal, Pre):
extra_info["language"] = self.internal.language
@@ -95,5 +97,5 @@ class TelegramMessage(EntityString[TelegramEntity, TelegramEntityType]):
entity_class = TelegramEntity
@property
def telegram_entities(self) -> List[TypeMessageEntity]:
def telegram_entities(self) -> list[TypeMessageEntity]:
return [entity.internal for entity in self.entities]