Handle surrogates in a non-hacky way

This commit is contained in:
Tulir Asokan
2018-02-19 20:53:37 +02:00
parent 51b868d9ce
commit 092b80ad02
3 changed files with 28 additions and 21 deletions
+16
View File
@@ -0,0 +1,16 @@
# Unicode surrogate handling
# From https://github.com/LonamiWebs/Telethon/blob/master/telethon/extensions/markdown.py
import struct
def add_surrogates(text):
if text is None:
return None
return "".join("".join(chr(y) for y in struct.unpack("<HH", x.encode("utf-16-le")))
if (0x10000 <= ord(x) <= 0x10FFFF) else x for x in text)
def remove_surrogates(text):
if text is None:
return None
return text.encode("utf-16", "surrogatepass").decode("utf-16")