Implement Matrix -> Telegram formatted message bridging

This commit is contained in:
Tulir Asokan
2018-01-21 20:48:14 +02:00
parent f1d8312806
commit a8359441b0
6 changed files with 198 additions and 7 deletions
+2 -2
View File
@@ -54,9 +54,9 @@ does not do this automatically.
## Features & Roadmap ## Features & Roadmap
* Matrix → Telegram * Matrix → Telegram
* [x] Plaintext messages * [x] Plaintext messages
* [ ] Formatted messages * [x] Formatted messages
* [ ] Bot commands (!command -> /command) * [ ] Bot commands (!command -> /command)
* [ ] Mentions * [x] Mentions
* [ ] Locations * [ ] Locations
* [ ] Images * [ ] Images
* [ ] Files * [ ] Files
+2
View File
@@ -30,6 +30,7 @@ from .db import init as init_db
from .user import init as init_user from .user import init as init_user
from .portal import init as init_portal from .portal import init as init_portal
from .puppet import init as init_puppet from .puppet import init as init_puppet
from .formatter import init as init_formatter
log = logging.getLogger("mau") log = logging.getLogger("mau")
time_formatter = logging.Formatter("[%(asctime)s] [%(levelname)s@%(name)s] %(message)s") time_formatter = logging.Formatter("[%(asctime)s] [%(levelname)s@%(name)s] %(message)s")
@@ -75,6 +76,7 @@ context = (appserv, db, log, config)
with appserv.run(config["appservice.hostname"], config["appservice.port"]) as start: with appserv.run(config["appservice.hostname"], config["appservice.port"]) as start:
init_db(db_factory) init_db(db_factory)
init_formatter(context)
init_portal(context) init_portal(context)
init_puppet(context) init_puppet(context)
init_user(context) init_user(context)
+155
View File
@@ -15,11 +15,160 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
import re import re
from html import escape, unescape from html import escape, unescape
from html.parser import HTMLParser
from collections import deque
from telethon.tl.types import * from telethon.tl.types import *
from . import user as u, puppet as p from . import user as u, puppet as p
log = None
class MatrixParser(HTMLParser):
matrix_to_regex = re.compile("https://matrix.to/#/(@.+)")
def __init__(self):
super().__init__()
self.text = ""
self.entities = []
self._building_entities = {}
self._list_counter = 0
self._open_tags = deque()
self._open_tags_meta = deque()
self._previous_ended_line = True
def handle_starttag(self, tag, attrs):
self._open_tags.appendleft(tag)
self._open_tags_meta.appendleft(0)
attrs = dict(attrs)
EntityType = None
args = {}
if tag == "strong" or tag == "b":
EntityType = MessageEntityBold
elif tag == "em" or tag == "i":
EntityType = MessageEntityItalic
elif tag == "code":
try:
pre = self._building_entities["pre"]
try:
pre.language = attrs["class"][len("language-"):]
except KeyError:
pass
except KeyError:
EntityType = MessageEntityCode
elif tag == "pre":
EntityType = MessageEntityPre
args["language"] = ""
elif tag == "a":
try:
url = attrs["href"]
except KeyError:
return
mention = self.matrix_to_regex.search(url)
if mention:
mxid = mention.group(1)
puppet_match = p.Puppet.mxid_regex.search(mxid)
if puppet_match:
user = p.Puppet.get(puppet_match.group(1), create=False)
else:
user = u.User.get_by_mxid(mxid, create=False)
if not user:
return
if user.username:
EntityType = MessageEntityMention
url = f"@{user.username}"
else:
EntityType = MessageEntityMentionName
args["user_id"] = user.tgid
elif url.startswith("mailto:"):
url = url[len("mailto:"):]
EntityType = MessageEntityEmail
else:
if self.get_starttag_text() == url:
EntityType = MessageEntityUrl
else:
EntityType = MessageEntityTextUrl
args["url"] = url
url = None
self._open_tags_meta.popleft()
self._open_tags_meta.appendleft(url)
if EntityType and tag not in self._building_entities:
self._building_entities[tag] = EntityType(offset=len(self.text), length=0, **args)
def _list_depth(self):
depth = 0
for tag in self._open_tags:
if tag == "ol" or tag == "ul":
depth += 1
return depth
def handle_data(self, text):
text = unescape(text)
previous_tag = self._open_tags[0] if len(self._open_tags) > 0 else ""
list_format_offset = 0
if previous_tag == "a":
url = self._open_tags_meta[0]
if url:
text = url
elif len(self._open_tags) > 1 and self._previous_ended_line and previous_tag == "li":
list_type = self._open_tags[1]
indent = (self._list_depth() - 1) * 4 * " "
text = text.strip("\n")
if len(text) == 0:
return
elif list_type == "ul":
text = f"{indent}* {text}"
list_format_offset = len(indent) + 2
elif list_type == "ol":
n = self._open_tags_meta[1]
n += 1
self._open_tags_meta[1] = n
text = f"{indent}{n}. {text}"
list_format_offset = len(indent) + 3
for tag, entity in self._building_entities.items():
entity.length += len(text.strip("\n"))
entity.offset += list_format_offset
if text.endswith("\n"):
self._previous_ended_line = True
else:
self._previous_ended_line = False
self.text += text
def handle_endtag(self, tag):
try:
self._open_tags.popleft()
self._open_tags_meta.popleft()
except IndexError:
pass
if (tag == "ul" or tag == "ol") and self.text.endswith("\n"):
self.text = self.text[:-1]
entity = self._building_entities.pop(tag, None)
if entity:
self.entities.append(entity)
def matrix_to_telegram(html):
try:
parser = MatrixParser()
parser.feed(html)
return parser.text, parser.entities
except:
log.exception("Failed to convert Matrix format:\nhtml=%s", html)
def telegram_to_matrix(text, entities): def telegram_to_matrix(text, entities):
try:
return _telegram_to_matrix(text, entities)
except:
log.exception("Failed to convert Telegram format:\n"
"message=%s\n"
"entities=%s",
text, entities)
def _telegram_to_matrix(text, entities):
if not entities: if not entities:
return text return text
html = [] html = []
@@ -86,3 +235,9 @@ def telegram_to_matrix(text, entities):
last_offset = entity.offset + (0 if skip_entity else entity.length) last_offset = entity.offset + (0 if skip_entity else entity.length)
html.append(text[last_offset:]) html.append(text[last_offset:])
return "".join(html) return "".join(html)
def init(context):
global log
_, _, parent_log, _ = context
log = parent_log.getChild("formatter")
+5 -1
View File
@@ -79,7 +79,11 @@ class Portal:
def handle_matrix_message(self, sender, message): def handle_matrix_message(self, sender, message):
type = message["msgtype"] type = message["msgtype"]
if type == "m.text": if type == "m.text":
sender.client.send_message(self.peer, message["body"]) if "format" in message and message["format"] == "org.matrix.custom.html":
message, entities = formatter.matrix_to_telegram(message["formatted_body"])
sender.send_message(self.peer, message, entities=entities)
else:
sender.send_message(self.peer, message["body"])
def handle_telegram_message(self, sender, evt): def handle_telegram_message(self, sender, evt):
self.log.debug("Sending %s to %s by %d", evt.message, self.mxid, sender.id) self.log.debug("Sending %s to %s by %d", evt.message, self.mxid, sender.id)
+8 -3
View File
@@ -13,10 +13,8 @@
# #
# You should have received a copy of the GNU General Public License # You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
from telethon import TelegramClient import re
from telethon.tl.types import User as UserEntity, Chat as ChatEntity, Channel as ChannelEntity
from .db import Puppet as DBPuppet from .db import Puppet as DBPuppet
from . import portal as p
config = None config = None
@@ -36,6 +34,10 @@ class Puppet:
self.cache[id] = self self.cache[id] = self
@property
def tgid(self):
return self.id
def to_db(self): def to_db(self):
return self.db.merge( return self.db.merge(
DBPuppet(id=self.id, username=self.username, displayname=self.displayname)) DBPuppet(id=self.id, username=self.username, displayname=self.displayname))
@@ -109,3 +111,6 @@ def init(context):
global config global config
Puppet.az, Puppet.db, log, config = context Puppet.az, Puppet.db, log, config = context
Puppet.log = log.getChild("puppet") Puppet.log = log.getChild("puppet")
localpart = config.get("bridge.alias_template", "telegram_{}").format("(.+)")
hs = config["homeserver"]["domain"]
Puppet.mxid_regex = re.compile(f"@{localpart}:{hs}")
+26 -1
View File
@@ -16,7 +16,8 @@
import traceback import traceback
from telethon import TelegramClient from telethon import TelegramClient
from telethon.tl.types import User as UserEntity, Chat as ChatEntity, Channel as ChannelEntity, \ from telethon.tl.types import User as UserEntity, Chat as ChatEntity, Channel as ChannelEntity, \
UpdateShortMessage, UpdateShortChatMessage UpdateShortMessage, UpdateShortChatMessage, Message, UpdateShortSentMessage
from telethon.tl.functions.messages import SendMessageRequest
from .db import User as DBUser from .db import User as DBUser
from . import portal as po, puppet as pu from . import portal as po, puppet as pu
@@ -89,6 +90,30 @@ class User:
self.client = None self.client = None
self.connected = False self.connected = False
def send_message(self, entity, message, reply_to=None, entities=None, link_preview=True):
entity = self.client.get_input_entity(entity)
request = SendMessageRequest(
peer=entity,
message=message,
entities=entities,
no_webpage=not link_preview,
reply_to_msg_id=self.client._get_reply_to(reply_to)
)
result = self.client(request)
if isinstance(result, UpdateShortSentMessage):
return Message(
id=result.id,
to_id=entity,
message=message,
date=result.date,
out=result.out,
media=result.media,
entities=result.entities
)
return self.client._get_response_message(request, result)
def sync_dialogs(self): def sync_dialogs(self):
dialogs = self.client.get_dialogs(limit=30) dialogs = self.client.get_dialogs(limit=30)
for dialog in dialogs: for dialog in dialogs: