Add refactoring and formatted text bridging

This commit is contained in:
Tulir Asokan
2017-11-27 21:35:32 +02:00
parent a6bd561c7c
commit 0a09816003
6 changed files with 301 additions and 32 deletions
+2 -1
View File
@@ -169,6 +169,7 @@
"no-case-declarations": "off",
"no-template-curly-in-string": "off",
"no-await-in-loop": "off",
"no-restricted-globals": "off"
"no-restricted-globals": "off",
"no-fallthrough": "off"
}
}
+2
View File
@@ -28,6 +28,7 @@ A Telegram chat will be created once the bridge is stable enough.
## Features & Roadmap
* Matrix → Telegram
* [x] Plaintext messages
* [x] Formatted messages
* [ ] Images
* [ ] Files
* [ ] Message redactions
@@ -36,6 +37,7 @@ A Telegram chat will be created once the bridge is stable enough.
* [ ] Power level
* Telegram → Matrix
* [x] Plaintext messages
* [x] Formatted messages
* [x] Images
* [ ] Stickers (somewhat works through document upload, no preview though)
* [x] Audio messages
+212
View File
@@ -0,0 +1,212 @@
// mautrix-telegram - A Matrix-Telegram puppeting bridge
// Copyright (C) 2017 Tulir Asokan
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
/**
* Utility functions to convert between Telegram and Matrix (HTML) formatting.
*
* WARNING: This module contains headache-causing regular expressions and other duct tape.
*
* @module formatter
*/
String.prototype.insert = function(at, str) {
return this.slice(0, at) + str + this.slice(at)
}
function addSimpleTag(tags, entity, tag, priority = 0) {
tags.push([entity.offset, `<${tag}>`, -priority])
tags.push([entity.offset + entity.length, `</${tag}>`, priority])
}
function addTag(tags, entity, tag, attrs, priority = 0) {
tags.push([entity.offset, `<${tag} ${attrs}>`, -priority])
tags.push([entity.offset + entity.length, `</${tag}>`, priority])
}
/**
* Convert a Telegram entity-formatted message to a Matrix HTML-formatted message.
*
* WARNING: I am not responsible for possible severe headaches caused by reading any part of this function.
* While there are a few explaining comments, I haven't even tried to figure out why it works.
* The tag priorities are especially non-understandable. You have been warned.
*
* @param {string} message The plaintext message.
* @param {Array} entities The Telegram formatting entities.
*/
function telegramToMatrix(message, entities) {
const tags = []
// Decreasing priority counter used to ensure that formattings right next to eachother don't flip like this:
// *bold*_italic_ --> <strong>bold<em></strong>italic</em>
let pc = 9001
// Convert Telegram formatting entities into a weird custom indexed HTML tag format thingy.
for (const entity of entities) {
let url, tag
switch (entity._) {
case "messageEntityBold":
tag = tag || "strong"
case "messageEntityItalic":
tag = tag || "em"
case "messageEntityCode":
tag = tag || "code"
addSimpleTag(tags, entity, tag, --pc)
break
case "messageEntityPre":
pc--
addSimpleTag(tags, entity, "pre", pc)
addTag(tags, entity, "code", `class="language-${entity.language}"`, pc + 1)
break
case "messageEntityHashtag":
case "messageEntityBotCommand":
// TODO bridge bot commands differently?
addTag(tags, entity, "font", "color=\"blue\"", --pc)
break
case "messageEntityMention":
// TODO bridge mentions properly?
addTag(tags, entity, "font", "color=\"red\"", --pc)
break
case "messageEntityEmail":
url = url || `mailto:${message.substr(entity.offset, entity.length)}`
case "messageEntityUrl":
url = url || message.substr(entity.offset, entity.length)
case "messageEntityTextUrl":
url = url || entity.url
addTag(tags, entity, "a", `href="${url}"`, --pc)
break
}
}
// Sort tags in a mysterious way (it seems to work, don't touch it!).
//
// The important thing is that the tags are sorted last to first,
// so when replacing by index, the index doesn't need to be adapted.
tags.sort(([aIndex, , aPriority], [bIndex, , bPriority]) => bIndex - aIndex || aPriority - bPriority)
// Insert tags into message
for (const [index, replacement] of tags) {
message = message.insert(index, replacement)
}
return message
}
// Formatting that is converted back to text
const paragraphs = /<p>(.*?)<\/p>/g
const headers = /<h([0-6])>(.*?)<\/h[0-6]>/g
const unorderedLists = /<ul>((.|\n)*?)<\/ul>/g
const orderedLists = /<ol>((.|\n)*?)<\/ol>/g
const listEntries = /<li>(.*?)<\/li>/g
// Formatting that is converted to Telegram entity formatting
const boldText = /<strong>((.|\n)*?)<\/strong>/g
const italicText = /<em>((.|\n)*?)<\/em>/g
const codeblocks = /<pre><code>((.|\n)*?)<\/code><\/pre>/g
const codeblocksWithSyntaxHighlight = /<pre><code class="language-(.*?)">((.|\n)*?)<\/code><\/pre>/g
const inlineCode = /<code>(.*?)<\/code>/g
const emailAddresses = /<a href="mailto:(.*?)">((.|\n)*?)<\/a>/g
const hyperlinks = /<a href="(.*?)">((.|\n)*?)<\/a>/g
const linebreaks = /<br(.*?)>(\n)?/g
/**
* Convert a Matrix HTML-formatted message to a Telegram entity-formatted message.
*
* @param {string} message The HTML-formatted message.
* @returns {{message: string, entities: Array}} The Telegram entity-formatted message.
*/
function matrixToTelegram(message) {
const entities = []
message = message.replace(linebreaks, "\n")
message = message.replace(paragraphs, "$1\n")
message = message.replace(headers, (_, count, text) => `${"#".repeat(count)} ${text}`)
message = message.replace(unorderedLists, (_, list) => {
return list.replace(listEntries, "- $1")
})
message = message.replace(orderedLists, (_, list) => {
let n = 0
return list.replace(listEntries, (fullMatch, text) => `${++n}. ${text}`)
})
message = message.replace(boldText, (_, text, index) => {
entities.push({
_: "messageEntityBold",
offset: index,
length: text.length,
})
return text
})
message = message.replace(italicText, (_, text, index) => {
entities.push({
_: "messageEntityItalic",
offset: index,
length: text.length,
})
return text
})
message = message.replace(codeblocks, (_, text, index) => {
entities.push({
_: "messageEntityPre",
offset: index,
length: text.length,
language: "",
})
return text
})
message = message.replace(codeblocksWithSyntaxHighlight, (_, language, text, index) => {
entities.push({
_: "messageEntityPre",
offset: index,
length: text.length,
language,
})
return text
})
message = message.replace(inlineCode, (_, text, index) => {
entities.push({
_: "messageEntityCode",
offset: index,
length: text.length,
})
return text
})
message = message.replace(emailAddresses, (_, address, text, index) => {
entities.push({
_: "messageEntityEmail",
offset: index,
length: address.length,
})
return address
})
message = message.replace(hyperlinks, (_, url, text, index) => {
if (url === text) {
entities.push({
_: "messageEntityUrl",
offset: index,
length: text.length,
})
} else {
entities.push({
_: "messageEntityTextUrl",
offset: index,
length: text.length,
url,
})
}
return text
})
console.log(entities)
return { message, entities }
}
module.exports = { telegramToMatrix, matrixToTelegram }
+51 -16
View File
@@ -14,6 +14,7 @@
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
const TelegramPeer = require("./telegram-peer")
const formatter = require("./formatter")
/**
* Portal represents a portal from a Matrix room to a Telegram chat.
@@ -65,9 +66,9 @@ class Portal {
}
async copyPhotoSize(telegramPOV, sender, photo) {
async copyTelegramPhoto(telegramPOV, sender, photo) {
const size = photo.sizes.slice(-1)[0]
const uploaded = await this.copyFile(telegramPOV, sender, size.location, photo.id)
const uploaded = await this.copyTelegramFile(telegramPOV, sender, size.location, photo.id)
uploaded.info.h = size.h
uploaded.info.w = size.w
uploaded.info.size = size.size
@@ -75,7 +76,7 @@ class Portal {
return uploaded
}
async copyFile(telegramPOV, sender, location, id) {
async copyTelegramFile(telegramPOV, sender, location, id) {
console.log(JSON.stringify(location, "", " "))
id = id || location.id
const file = await telegramPOV.getFile(location)
@@ -129,24 +130,43 @@ class Portal {
return this.peer.loadAccessHash(this.app, telegramPOV, { portal: this })
}
async handleTelegramEvent(evt) {
// FIXME room creation is disabled due to possibility of multiple messages causing duplicate rooms
//if (!this.isMatrixRoomCreated()) {
// await this.createMatrixRoom(evt.source, { invite: [evt.source.matrixUser.userID] })
//}
async handleTelegramTyping(evt) {
if (!this.isMatrixRoomCreated()) {
return
}
const typer = await this.app.getTelegramUser(evt.from)
// The Intent API currently doesn't allow you to set the
// typing timeout. Once it does, we should set it to ~5.5s
// as Telegram resends typing notifications every 5 seconds.
typer.intent.sendTyping(this.roomID, true/*, 5500*/)
}
async handleTelegramMessage(evt) {
if (!this.isMatrixRoomCreated()) {
// FIXME room creation is disabled due to possibility of multiple messages causing duplicate rooms
// await this.createMatrixRoom(evt.source, { invite: [evt.source.matrixUser.userID] })
console.warn("Room not created!", this)
return
}
const sender = await this.app.getTelegramUser(evt.from)
await sender.intent.sendTyping(this.roomID, false/*, 5500*/)
// TODO handle other content types
if (evt.text.length > 0) {
sender.sendText(this.roomID, evt.text)
if (evt.entities) {
evt.html = formatter.telegramToMatrix(evt.text, evt.entities)
sender.sendHTML(this.roomID, evt.html)
} else {
sender.sendText(this.roomID, evt.text)
}
}
if (evt.photo) {
const photo = await this.copyPhoto(evt.source, sender, evt.photo)
photo.name = evt.caption || "Photo"
const photo = await this.copyTelegramPhoto(evt.source, sender, evt.photo)
photo.name = evt.caption || "Uploaded photo"
sender.sendFile(this.roomID, photo)
} else if (evt.document) {
const file = await this.copyFile(evt.source, sender, evt.document)
file.name = evt.caption || "File upload"
// TODO handle stickers better
const file = await this.copyTelegramFile(evt.source, sender, evt.document)
file.name = evt.caption || "Uploaded document"
sender.sendFile(this.roomID, file)
} else if (evt.geo) {
sender.sendLocation(this.roomID, evt.geo)
@@ -154,11 +174,26 @@ class Portal {
}
async handleMatrixEvent(sender, evt) {
await this.loadAccessHash(sender.telegramPuppet)
switch (evt.content.msgtype) {
case "m.notice":
case "m.text":
await this.loadAccessHash(sender.telegramPuppet)
sender.telegramPuppet.sendMessage(this.peer, evt.content.body)
if (evt.content.format === "org.matrix.custom.html") {
const { message, entities } = formatter.matrixToTelegram(evt.content.formatted_body)
sender.telegramPuppet.sendMessage(this.peer, message, entities)
} else {
sender.telegramPuppet.sendMessage(this.peer, evt.content.body)
}
break
case "m.video":
case "m.audio":
case "m.file":
// TODO upload document
break
case "m.image":
break
case "m.geo":
// TODO send location
break
default:
console.log("Unhandled event:", evt)
+24 -15
View File
@@ -23,7 +23,6 @@ const TelegramPeer = require("./telegram-peer")
function metaFromFileType(type) {
const extension = type.substr("storage.file".length).toLowerCase()
let fileClass, mimetype, matrixtype
/*eslint no-fallthrough: "off"*/
switch (type) {
case "storage.fileGif":
case "storage.fileJpeg":
@@ -233,10 +232,11 @@ class TelegramPuppet {
}
}
async sendMessage(peer, message) {
async sendMessage(peer, message, entities = undefined) {
const result = await this.client("messages.sendMessage", {
peer: peer.toInputPeer(),
message,
entities,
random_id: ~~(Math.random() * (1 << 30)),
})
return result
@@ -259,28 +259,31 @@ class TelegramPuppet {
}
let to, from, portal
switch (update._) {
// Telegram user status handling.
case "updateUserStatus":
const user = await this.app.getTelegramUser(update.user_id)
const presence = update.status._ === "userStatusOnline" ? "online" : "offline"
await user.intent.getClient().setPresence({ presence })
return
//
// Telegram typing event handling
//
case "updateUserTyping":
to = new TelegramPeer("user", update.user_id, { receiverID: this.userID })
/* falls through */
case "updateChatUserTyping":
to = to || new TelegramPeer("chat", update.chat_id)
portal = await this.app.getPortalByPeer(to)
if (portal.isMatrixRoomCreated()) {
const sender = await this.app.getTelegramUser(update.user_id)
// The Intent API currently doesn't allow you to set the
// typing timeout. Once it does, we should set it to ~5.5s
// as Telegram resends typing notifications every 5 seconds.
await sender.intent.sendTyping(portal.roomID, true/*, 5500*/)
}
return
portal = await this.app.getPortalByPeer(to)
await portal.handleTelegramTyping({
from: update.user_id,
to,
source: this,
})
return
//
// The following cases are all messages. The actual handling happens after the switch.
// Telegram message handling/parsing.
// The actual handling happens after the switch.
//
case "updateShortMessage":
to = new TelegramPeer("user", update.user_id, { receiverID: this.userID })
@@ -298,17 +301,19 @@ class TelegramPuppet {
break
default:
console.log(`Update of type ${update._} received:\n${JSON.stringify(update, "", " ")}`)
// Unknown update type
console.log(`Update of unknown type ${update._} received:\n${JSON.stringify(update, "", " ")}`)
return
}
console.log(update)
// TODO handle other content types in updateNewMessage
portal = await this.app.getPortalByPeer(to)
await portal.handleTelegramEvent({
await portal.handleTelegramMessage({
from,
to,
source: this,
text: update.message,
entities: update.entities,
photo: update.media && update.media._ === "messageMediaPhoto"
? update.media.photo
: undefined,
@@ -394,6 +399,10 @@ class TelegramPuppet {
}, 1000)
}
async uploadFile() {
}
async getFile(location) {
if (location.volume_id && location.local_id) {
location = {
+10
View File
@@ -13,6 +13,7 @@
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
const sanitizeHTML = require("sanitize-html")
const TelegramPeer = require("./telegram-peer")
/**
@@ -134,6 +135,15 @@ class TelegramUser {
return this.app.putUser(this)
}
sendHTML(roomID, html) {
return this.intent.sendMessage(roomID, {
msgtype: "m.text",
format: "org.matrix.custom.html",
formatted_body: html,
body: sanitizeHTML(html),
})
}
sendText(roomID, text) {
return this.intent.sendText(roomID, text)
}