diff --git a/.eslintrc.json b/.eslintrc.json index 3d81002a..2c5d8498 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -169,6 +169,7 @@ "no-case-declarations": "off", "no-template-curly-in-string": "off", "no-await-in-loop": "off", - "no-restricted-globals": "off" + "no-restricted-globals": "off", + "no-fallthrough": "off" } } diff --git a/README.md b/README.md index 815cb277..f5176057 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,7 @@ A Telegram chat will be created once the bridge is stable enough. ## Features & Roadmap * Matrix → Telegram * [x] Plaintext messages + * [x] Formatted messages * [ ] Images * [ ] Files * [ ] Message redactions @@ -36,6 +37,7 @@ A Telegram chat will be created once the bridge is stable enough. * [ ] Power level * Telegram → Matrix * [x] Plaintext messages + * [x] Formatted messages * [x] Images * [ ] Stickers (somewhat works through document upload, no preview though) * [x] Audio messages diff --git a/src/formatter.js b/src/formatter.js new file mode 100644 index 00000000..282d8573 --- /dev/null +++ b/src/formatter.js @@ -0,0 +1,212 @@ +// mautrix-telegram - A Matrix-Telegram puppeting bridge +// Copyright (C) 2017 Tulir Asokan +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +/** + * Utility functions to convert between Telegram and Matrix (HTML) formatting. + * + * WARNING: This module contains headache-causing regular expressions and other duct tape. + * + * @module formatter + */ + +String.prototype.insert = function(at, str) { + return this.slice(0, at) + str + this.slice(at) +} + +function addSimpleTag(tags, entity, tag, priority = 0) { + tags.push([entity.offset, `<${tag}>`, -priority]) + tags.push([entity.offset + entity.length, ``, priority]) +} + +function addTag(tags, entity, tag, attrs, priority = 0) { + tags.push([entity.offset, `<${tag} ${attrs}>`, -priority]) + tags.push([entity.offset + entity.length, ``, priority]) +} + +/** + * Convert a Telegram entity-formatted message to a Matrix HTML-formatted message. + * + * WARNING: I am not responsible for possible severe headaches caused by reading any part of this function. + * While there are a few explaining comments, I haven't even tried to figure out why it works. + * The tag priorities are especially non-understandable. You have been warned. + * + * @param {string} message The plaintext message. + * @param {Array} entities The Telegram formatting entities. + */ +function telegramToMatrix(message, entities) { + const tags = [] + // Decreasing priority counter used to ensure that formattings right next to eachother don't flip like this: + // *bold*_italic_ --> bolditalic + let pc = 9001 + + // Convert Telegram formatting entities into a weird custom indexed HTML tag format thingy. + for (const entity of entities) { + let url, tag + switch (entity._) { + case "messageEntityBold": + tag = tag || "strong" + case "messageEntityItalic": + tag = tag || "em" + case "messageEntityCode": + tag = tag || "code" + addSimpleTag(tags, entity, tag, --pc) + break + case "messageEntityPre": + pc-- + addSimpleTag(tags, entity, "pre", pc) + addTag(tags, entity, "code", `class="language-${entity.language}"`, pc + 1) + break + case "messageEntityHashtag": + case "messageEntityBotCommand": + // TODO bridge bot commands differently? + addTag(tags, entity, "font", "color=\"blue\"", --pc) + break + case "messageEntityMention": + // TODO bridge mentions properly? + addTag(tags, entity, "font", "color=\"red\"", --pc) + break + case "messageEntityEmail": + url = url || `mailto:${message.substr(entity.offset, entity.length)}` + case "messageEntityUrl": + url = url || message.substr(entity.offset, entity.length) + case "messageEntityTextUrl": + url = url || entity.url + addTag(tags, entity, "a", `href="${url}"`, --pc) + break + } + } + + // Sort tags in a mysterious way (it seems to work, don't touch it!). + // + // The important thing is that the tags are sorted last to first, + // so when replacing by index, the index doesn't need to be adapted. + tags.sort(([aIndex, , aPriority], [bIndex, , bPriority]) => bIndex - aIndex || aPriority - bPriority) + + // Insert tags into message + for (const [index, replacement] of tags) { + message = message.insert(index, replacement) + } + return message +} + +// Formatting that is converted back to text +const paragraphs = /

(.*?)<\/p>/g +const headers = /(.*?)<\/h[0-6]>/g +const unorderedLists = /