Add basic support for bridging custom emojis from Telegram
This commit is contained in:
@@ -6,6 +6,7 @@ Minimum Conduit version remains at 0.4.0.
|
|||||||
|
|
||||||
### Added
|
### Added
|
||||||
* Added provisioning API for resolving Telegram identifiers (like usernames).
|
* Added provisioning API for resolving Telegram identifiers (like usernames).
|
||||||
|
* Added basic bridging of Telegram custom emojis to Matrix.
|
||||||
* Added option to not bridge chats with lots of members.
|
* Added option to not bridge chats with lots of members.
|
||||||
* Added option to include captions in the same message as the media to
|
* Added option to include captions in the same message as the media to
|
||||||
implement [MSC2530]. Sending captions the same way is also supported and
|
implement [MSC2530]. Sending captions the same way is also supported and
|
||||||
|
|||||||
@@ -24,6 +24,7 @@
|
|||||||
* Telegram → Matrix
|
* Telegram → Matrix
|
||||||
* [x] Message content (text, formatting, files, etc..)
|
* [x] Message content (text, formatting, files, etc..)
|
||||||
* [ ] Advanced message content/media
|
* [ ] Advanced message content/media
|
||||||
|
* [x] Custom emojis
|
||||||
* [x] Polls
|
* [x] Polls
|
||||||
* [x] Games
|
* [x] Games
|
||||||
* [ ] Buttons
|
* [ ] Buttons
|
||||||
|
|||||||
@@ -17,10 +17,11 @@ from __future__ import annotations
|
|||||||
|
|
||||||
from typing import TYPE_CHECKING, ClassVar
|
from typing import TYPE_CHECKING, ClassVar
|
||||||
|
|
||||||
|
from asyncpg import Record
|
||||||
from attr import dataclass
|
from attr import dataclass
|
||||||
|
|
||||||
from mautrix.types import ContentURI, EncryptedFile
|
from mautrix.types import ContentURI, EncryptedFile
|
||||||
from mautrix.util.async_db import Database
|
from mautrix.util.async_db import Database, Scheme
|
||||||
|
|
||||||
fake_db = Database.create("") if TYPE_CHECKING else None
|
fake_db = Database.create("") if TYPE_CHECKING else None
|
||||||
|
|
||||||
@@ -40,28 +41,47 @@ class TelegramFile:
|
|||||||
decryption_info: EncryptedFile | None
|
decryption_info: EncryptedFile | None
|
||||||
thumbnail: TelegramFile | None = None
|
thumbnail: TelegramFile | None = None
|
||||||
|
|
||||||
|
columns: ClassVar[str] = (
|
||||||
|
"id, mxc, mime_type, was_converted, timestamp, size, width, height, thumbnail, "
|
||||||
|
"decryption_info"
|
||||||
|
)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
async def get(cls, loc_id: str, *, _thumbnail: bool = False) -> TelegramFile | None:
|
def _from_row(cls, row: Record | None) -> TelegramFile | None:
|
||||||
q = (
|
|
||||||
"SELECT id, mxc, mime_type, was_converted, timestamp, size, width, height, thumbnail,"
|
|
||||||
" decryption_info "
|
|
||||||
"FROM telegram_file WHERE id=$1"
|
|
||||||
)
|
|
||||||
row = await cls.db.fetchrow(q, loc_id)
|
|
||||||
if row is None:
|
if row is None:
|
||||||
return None
|
return None
|
||||||
data = {**row}
|
data = {**row}
|
||||||
thumbnail_id = data.pop("thumbnail", None)
|
data.pop("thumbnail", None)
|
||||||
if _thumbnail:
|
|
||||||
# Don't allow more than one level of recursion
|
|
||||||
thumbnail_id = None
|
|
||||||
decryption_info = data.pop("decryption_info", None)
|
decryption_info = data.pop("decryption_info", None)
|
||||||
return cls(
|
return cls(
|
||||||
**data,
|
**data,
|
||||||
thumbnail=(await cls.get(thumbnail_id, _thumbnail=True)) if thumbnail_id else None,
|
thumbnail=None,
|
||||||
decryption_info=EncryptedFile.parse_json(decryption_info) if decryption_info else None,
|
decryption_info=EncryptedFile.parse_json(decryption_info) if decryption_info else None,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
async def get_many(cls, loc_ids: list[str]) -> list[TelegramFile]:
|
||||||
|
if cls.db.scheme in (Scheme.POSTGRES, Scheme.COCKROACH):
|
||||||
|
q = f"SELECT {cls.columns} FROM telegram_file WHERE id=ANY($1)"
|
||||||
|
rows = await cls.db.fetch(q, loc_ids)
|
||||||
|
else:
|
||||||
|
tgid_placeholders = ("?," * len(loc_ids)).rstrip(",")
|
||||||
|
q = f"SELECT {cls.columns} FROM telegram_file WHERE id IN ({tgid_placeholders})"
|
||||||
|
rows = await cls.db.fetch(q, *loc_ids)
|
||||||
|
return [cls._from_row(row) for row in rows]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
async def get(cls, loc_id: str, *, _thumbnail: bool = False) -> TelegramFile | None:
|
||||||
|
q = f"SELECT {cls.columns} FROM telegram_file WHERE id=$1"
|
||||||
|
row = await cls.db.fetchrow(q, loc_id)
|
||||||
|
file = cls._from_row(row)
|
||||||
|
if file is None:
|
||||||
|
return None
|
||||||
|
thumbnail_id = row.get("thumbnail", None)
|
||||||
|
if thumbnail_id and not _thumbnail:
|
||||||
|
file.thumbnail = await cls.get(thumbnail_id, _thumbnail=True)
|
||||||
|
return file
|
||||||
|
|
||||||
async def insert(self) -> None:
|
async def insert(self) -> None:
|
||||||
q = (
|
q = (
|
||||||
"INSERT INTO telegram_file (id, mxc, mime_type, was_converted, size, width, height, "
|
"INSERT INTO telegram_file (id, mxc, mime_type, was_converted, size, width, height, "
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ import logging
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from telethon.errors import RPCError
|
from telethon.errors import RPCError
|
||||||
from telethon.helpers import add_surrogate, del_surrogate, within_surrogate
|
from telethon.helpers import add_surrogate, del_surrogate
|
||||||
from telethon.tl.custom import Message
|
from telethon.tl.custom import Message
|
||||||
from telethon.tl.types import (
|
from telethon.tl.types import (
|
||||||
MessageEntityBlockquote,
|
MessageEntityBlockquote,
|
||||||
@@ -52,8 +52,9 @@ from telethon.tl.types import (
|
|||||||
from mautrix.types import Format, MessageType, TextMessageEventContent
|
from mautrix.types import Format, MessageType, TextMessageEventContent
|
||||||
|
|
||||||
from .. import abstract_user as au, portal as po, puppet as pu, user as u
|
from .. import abstract_user as au, portal as po, puppet as pu, user as u
|
||||||
from ..db import Message as DBMessage
|
from ..db import Message as DBMessage, TelegramFile as DBTelegramFile
|
||||||
from ..types import TelegramID
|
from ..types import TelegramID
|
||||||
|
from ..util.file_transfer import transfer_custom_emojis_to_matrix
|
||||||
|
|
||||||
log: logging.Logger = logging.getLogger("mau.fmt.tg")
|
log: logging.Logger = logging.getLogger("mau.fmt.tg")
|
||||||
|
|
||||||
@@ -125,6 +126,27 @@ async def _add_forward_header(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ReuploadedCustomEmoji(MessageEntityCustomEmoji):
|
||||||
|
file: DBTelegramFile
|
||||||
|
|
||||||
|
def __init__(self, parent: MessageEntityCustomEmoji, file: DBTelegramFile) -> None:
|
||||||
|
super().__init__(parent.offset, parent.length, parent.document_id)
|
||||||
|
self.file = file
|
||||||
|
|
||||||
|
|
||||||
|
async def _convert_custom_emoji(
|
||||||
|
source: au.AbstractUser, entities: list[TypeMessageEntity]
|
||||||
|
) -> None:
|
||||||
|
emoji_ids = [
|
||||||
|
entity.document_id for entity in entities if isinstance(entity, MessageEntityCustomEmoji)
|
||||||
|
]
|
||||||
|
custom_emojis = await transfer_custom_emojis_to_matrix(source, emoji_ids)
|
||||||
|
if len(custom_emojis) > 0:
|
||||||
|
for i, entity in enumerate(entities):
|
||||||
|
if isinstance(entity, MessageEntityCustomEmoji):
|
||||||
|
entities[i] = ReuploadedCustomEmoji(entity, custom_emojis[entity.document_id])
|
||||||
|
|
||||||
|
|
||||||
async def telegram_to_matrix(
|
async def telegram_to_matrix(
|
||||||
evt: Message | SponsoredMessage,
|
evt: Message | SponsoredMessage,
|
||||||
source: au.AbstractUser,
|
source: au.AbstractUser,
|
||||||
@@ -138,6 +160,7 @@ async def telegram_to_matrix(
|
|||||||
)
|
)
|
||||||
entities = override_entities or evt.entities
|
entities = override_entities or evt.entities
|
||||||
if entities:
|
if entities:
|
||||||
|
await _convert_custom_emoji(source, entities)
|
||||||
content.format = Format.HTML
|
content.format = Format.HTML
|
||||||
html = await _telegram_entities_to_matrix_catch(add_surrogate(content.body), entities)
|
html = await _telegram_entities_to_matrix_catch(add_surrogate(content.body), entities)
|
||||||
content.formatted_body = del_surrogate(html)
|
content.formatted_body = del_surrogate(html)
|
||||||
@@ -166,9 +189,20 @@ async def _telegram_entities_to_matrix_catch(text: str, entities: list[TypeMessa
|
|||||||
return "[failed conversion in _telegram_entities_to_matrix]"
|
return "[failed conversion in _telegram_entities_to_matrix]"
|
||||||
|
|
||||||
|
|
||||||
|
def within_surrogate(text, index):
|
||||||
|
"""
|
||||||
|
`True` if ``index`` is within a surrogate (before and after it, not at!).
|
||||||
|
"""
|
||||||
|
return (
|
||||||
|
1 < index < len(text) # in bounds
|
||||||
|
and "\ud800" <= text[index - 1] <= "\udbff" # current is low surrogate
|
||||||
|
and "\udc00" <= text[index] <= "\udfff" # previous is high surrogate
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
async def _telegram_entities_to_matrix(
|
async def _telegram_entities_to_matrix(
|
||||||
text: str,
|
text: str,
|
||||||
entities: list[TypeMessageEntity],
|
entities: list[TypeMessageEntity | ReuploadedCustomEmoji],
|
||||||
offset: int = 0,
|
offset: int = 0,
|
||||||
length: int = None,
|
length: int = None,
|
||||||
in_codeblock: bool = False,
|
in_codeblock: bool = False,
|
||||||
@@ -197,10 +231,9 @@ async def _telegram_entities_to_matrix(
|
|||||||
elif relative_offset < last_offset:
|
elif relative_offset < last_offset:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# TODO this breaks when there are lots of emojis in a row (e.g. custom emojis)
|
while within_surrogate(text, relative_offset):
|
||||||
# while within_surrogate(text, relative_offset, length=length):
|
relative_offset += 1
|
||||||
# relative_offset += 1
|
while within_surrogate(text, relative_offset + entity.length):
|
||||||
while within_surrogate(text, relative_offset + entity.length, length=length):
|
|
||||||
entity.length += 1
|
entity.length += 1
|
||||||
|
|
||||||
skip_entity = False
|
skip_entity = False
|
||||||
@@ -244,8 +277,12 @@ async def _telegram_entities_to_matrix(
|
|||||||
html, entity_text, entity.url if entity_type == MessageEntityTextUrl else None
|
html, entity_text, entity.url if entity_type == MessageEntityTextUrl else None
|
||||||
)
|
)
|
||||||
elif entity_type == MessageEntityCustomEmoji:
|
elif entity_type == MessageEntityCustomEmoji:
|
||||||
# TODO support properly
|
|
||||||
html.append(entity_text)
|
html.append(entity_text)
|
||||||
|
elif entity_type == ReuploadedCustomEmoji:
|
||||||
|
html.append(
|
||||||
|
f'<img data-mx-emoticon src="{escape(entity.file.mxc)}" height="32" '
|
||||||
|
f'alt="{entity_text}" title="{entity_text}"/>'
|
||||||
|
)
|
||||||
elif entity_type in (
|
elif entity_type in (
|
||||||
MessageEntityBotCommand,
|
MessageEntityBotCommand,
|
||||||
MessageEntityHashtag,
|
MessageEntityHashtag,
|
||||||
|
|||||||
@@ -31,6 +31,7 @@ from telethon.errors import (
|
|||||||
LocationInvalidError,
|
LocationInvalidError,
|
||||||
SecurityError,
|
SecurityError,
|
||||||
)
|
)
|
||||||
|
from telethon.tl.functions.messages import GetCustomEmojiDocumentsRequest
|
||||||
from telethon.tl.types import (
|
from telethon.tl.types import (
|
||||||
Document,
|
Document,
|
||||||
InputDocumentFileLocation,
|
InputDocumentFileLocation,
|
||||||
@@ -45,6 +46,7 @@ import magic
|
|||||||
|
|
||||||
from mautrix.appservice import IntentAPI
|
from mautrix.appservice import IntentAPI
|
||||||
|
|
||||||
|
from .. import abstract_user as au
|
||||||
from ..db import TelegramFile as DBTelegramFile
|
from ..db import TelegramFile as DBTelegramFile
|
||||||
from ..tgclient import MautrixTelegramClient
|
from ..tgclient import MautrixTelegramClient
|
||||||
from ..util import sane_mimetypes
|
from ..util import sane_mimetypes
|
||||||
@@ -212,6 +214,37 @@ transfer_locks: dict[str, asyncio.Lock] = {}
|
|||||||
TypeThumbnail = Optional[Union[TypeLocation, TypePhotoSize]]
|
TypeThumbnail = Optional[Union[TypeLocation, TypePhotoSize]]
|
||||||
|
|
||||||
|
|
||||||
|
async def transfer_custom_emojis_to_matrix(
|
||||||
|
source: au.AbstractUser, emoji_ids: list[int]
|
||||||
|
) -> dict[int, DBTelegramFile]:
|
||||||
|
emoji_ids = set(emoji_ids)
|
||||||
|
existing = await DBTelegramFile.get_many([str(id) for id in emoji_ids])
|
||||||
|
file_map = {int(file.id): file for file in existing}
|
||||||
|
not_existing_ids = list(emoji_ids - file_map.keys())
|
||||||
|
if not_existing_ids:
|
||||||
|
log.debug(f"Transferring custom emojis through {source.mxid}: {not_existing_ids}")
|
||||||
|
|
||||||
|
documents: list[Document] = await source.client(
|
||||||
|
GetCustomEmojiDocumentsRequest(document_id=not_existing_ids)
|
||||||
|
)
|
||||||
|
|
||||||
|
async def transfer(document: Document) -> None:
|
||||||
|
file_map[document.id] = await transfer_file_to_matrix(
|
||||||
|
source.client,
|
||||||
|
source.bridge.az.intent,
|
||||||
|
document,
|
||||||
|
is_sticker=True,
|
||||||
|
tgs_convert={"target": "png", "args": {"width": 256, "height": 256}},
|
||||||
|
filename=f"emoji-{document.id}",
|
||||||
|
# Emojis are used as inline images and can't be encrypted
|
||||||
|
encrypt=False,
|
||||||
|
async_upload=source.config["homeserver.async_media"],
|
||||||
|
)
|
||||||
|
|
||||||
|
await asyncio.gather(*[transfer(doc) for doc in documents])
|
||||||
|
return file_map
|
||||||
|
|
||||||
|
|
||||||
async def transfer_file_to_matrix(
|
async def transfer_file_to_matrix(
|
||||||
client: MautrixTelegramClient,
|
client: MautrixTelegramClient,
|
||||||
intent: IntentAPI,
|
intent: IntentAPI,
|
||||||
|
|||||||
Reference in New Issue
Block a user