Implement message deduplication. Fixes #5
This commit is contained in:
@@ -14,15 +14,20 @@
|
|||||||
#
|
#
|
||||||
# You should have received a copy of the GNU General Public License
|
# You should have received a copy of the GNU General Public License
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
from io import BytesIO
|
||||||
|
from collections import deque
|
||||||
|
from datetime import datetime
|
||||||
|
import mimetypes
|
||||||
|
import hashlib
|
||||||
|
|
||||||
|
from PIL import Image
|
||||||
|
import magic
|
||||||
|
|
||||||
from telethon.tl.functions.messages import *
|
from telethon.tl.functions.messages import *
|
||||||
from telethon.tl.functions.channels import *
|
from telethon.tl.functions.channels import *
|
||||||
from telethon.errors.rpc_error_list import *
|
from telethon.errors.rpc_error_list import *
|
||||||
from telethon.tl.types import *
|
from telethon.tl.types import *
|
||||||
from PIL import Image
|
|
||||||
from io import BytesIO
|
|
||||||
from datetime import datetime
|
|
||||||
import mimetypes
|
|
||||||
import magic
|
|
||||||
from .db import Portal as DBPortal, Message as DBMessage
|
from .db import Portal as DBPortal, Message as DBMessage
|
||||||
from . import puppet as p, user as u, formatter
|
from . import puppet as p, user as u, formatter
|
||||||
|
|
||||||
@@ -50,6 +55,8 @@ class Portal:
|
|||||||
self.photo_id = photo_id
|
self.photo_id = photo_id
|
||||||
self._main_intent = None
|
self._main_intent = None
|
||||||
|
|
||||||
|
self._dedup = deque()
|
||||||
|
|
||||||
if tgid:
|
if tgid:
|
||||||
self.by_tgid[self.tgid_full] = self
|
self.by_tgid[self.tgid_full] = self
|
||||||
if mxid:
|
if mxid:
|
||||||
@@ -74,6 +81,43 @@ class Portal:
|
|||||||
elif self.peer_type == "channel":
|
elif self.peer_type == "channel":
|
||||||
return PeerChannel(channel_id=self.tgid)
|
return PeerChannel(channel_id=self.tgid)
|
||||||
|
|
||||||
|
def _hash_event(self, event):
|
||||||
|
if self.peer_type == "channel":
|
||||||
|
# Message IDs are unique per-channel
|
||||||
|
return event.id
|
||||||
|
|
||||||
|
# Non-channel messages are unique per-user (wtf telegram), so we have no other choice than
|
||||||
|
# to deduplicate based on a hash of the message content.
|
||||||
|
|
||||||
|
# The timestamp is only accurate to the second, so we can't rely on solely that either.
|
||||||
|
hash_content = [str(event.date.timestamp()), event.from_id, event.message]
|
||||||
|
if event.fwd_from:
|
||||||
|
hash_content += [event.fwd_from.from_id, event.fwd_from.channel_id]
|
||||||
|
elif event.media:
|
||||||
|
try:
|
||||||
|
hash_content += {
|
||||||
|
MessageMediaContact: lambda media: [media.user_id],
|
||||||
|
MessageMediaDocument: lambda media: [media.document.id, media.caption],
|
||||||
|
MessageMediaPhoto: lambda media: [media.photo.id, media.caption],
|
||||||
|
MessageMediaGeo: lambda media: [media.geo.long, media.geo.lat],
|
||||||
|
}[type(event.media)](event.media)
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return hashlib.md5("-"
|
||||||
|
.join(str(a) for a in hash_content)
|
||||||
|
.encode("utf-8")
|
||||||
|
).hexdigest()
|
||||||
|
|
||||||
|
def is_duplicate(self, event):
|
||||||
|
hash = self._hash_event(event)
|
||||||
|
if hash in self._dedup:
|
||||||
|
return True
|
||||||
|
self._dedup.append(hash)
|
||||||
|
if len(self._dedup) > 20:
|
||||||
|
self._dedup.popleft()
|
||||||
|
return False
|
||||||
|
|
||||||
def get_input_entity(self, user):
|
def get_input_entity(self, user):
|
||||||
return user.client.get_input_entity(self.peer)
|
return user.client.get_input_entity(self.peer)
|
||||||
|
|
||||||
@@ -365,6 +409,7 @@ class Portal:
|
|||||||
else:
|
else:
|
||||||
self.log.debug("Unhandled Matrix event: %s", message)
|
self.log.debug("Unhandled Matrix event: %s", message)
|
||||||
return
|
return
|
||||||
|
self.is_duplicate(response)
|
||||||
self.db.add(
|
self.db.add(
|
||||||
DBMessage(tgid=response.id, mx_room=self.mxid, mxid=event_id, user=sender.tgid))
|
DBMessage(tgid=response.id, mx_room=self.mxid, mxid=event_id, user=sender.tgid))
|
||||||
self.db.commit()
|
self.db.commit()
|
||||||
@@ -631,6 +676,9 @@ class Portal:
|
|||||||
if not self.mxid:
|
if not self.mxid:
|
||||||
self.create_matrix_room(source, invites=[source.mxid])
|
self.create_matrix_room(source, invites=[source.mxid])
|
||||||
|
|
||||||
|
if self.is_duplicate(evt):
|
||||||
|
return
|
||||||
|
|
||||||
if evt.message:
|
if evt.message:
|
||||||
response = self.handle_telegram_text(source, sender, evt)
|
response = self.handle_telegram_text(source, sender, evt)
|
||||||
elif evt.media:
|
elif evt.media:
|
||||||
|
|||||||
Reference in New Issue
Block a user