Improve deduplication

A stanza-id is only unique within a specific archive
This adds the archive jid to the query
This commit is contained in:
Philipp Hörist 2017-11-18 17:40:09 +01:00
parent cac1185a23
commit 7f1776ede9
2 changed files with 27 additions and 9 deletions

View file

@ -1062,7 +1062,7 @@ class MamMessageReceivedEvent(nec.NetworkIncomingEvent, HelperEvent):
def generate(self):
archive_jid = self.stanza.getFrom()
own_jid = self.conn.get_own_jid()
own_jid = self.conn.get_own_jid().getStripped()
if archive_jid and not archive_jid.bareMatch(own_jid):
# MAM Message not from our Archive
return False
@ -1076,7 +1076,7 @@ class MamMessageReceivedEvent(nec.NetworkIncomingEvent, HelperEvent):
self.unique_id, origin_id = self.get_unique_id()
# Check for duplicates
if app.logger.find_stanza_id(self.unique_id, origin_id):
if app.logger.find_stanza_id(own_jid, self.unique_id, origin_id):
return
self.msgtxt = self.msg_.getTagData('body')
@ -1150,16 +1150,18 @@ class MamGcMessageReceivedEvent(nec.NetworkIncomingEvent, HelperEvent):
self.kind = KindConstant.GC_MSG
def generate(self):
self.room_jid = self.stanza.getFrom()
self.msg_ = self.forwarded.getTag('message', protocol=True)
if self.msg_.getType() != 'groupchat':
return False
self.room_jid = self.stanza.getFrom().getStripped()
self.unique_id = self.get_stanza_id(self.result, query=True)
# Check for duplicates
if app.logger.find_stanza_id(self.unique_id):
if app.logger.find_stanza_id(self.room_jid, self.unique_id,
groupchat=True):
return
self.msgtxt = self.msg_.getTagData('body')
@ -1282,7 +1284,9 @@ class MessageReceivedEvent(nec.NetworkIncomingEvent, HelperEvent):
# Check groupchat messages for duplicates,
# We do this because of MUC History messages
if self.stanza.getType() == 'groupchat':
if app.logger.find_stanza_id(self.unique_id):
if app.logger.find_stanza_id(self.stanza.getFrom().getStripped(),
self.unique_id,
groupchat=True):
return
address_tag = self.stanza.getTag('addresses',

View file

@ -1088,12 +1088,19 @@ class Logger:
return True
return False
def find_stanza_id(self, stanza_id, origin_id=None):
def find_stanza_id(self, archive_jid, stanza_id, origin_id=None,
groupchat=False):
"""
Checks if a stanza-id is already in the `logs` table
:param archive_jid: The jid of the archive the stanza-id belongs to
:param stanza_id: The stanza-id
:param origin_id: The origin-id
:param groupchat: stanza-id is from a groupchat
return True if the stanza-id was found
"""
ids = []
@ -1105,12 +1112,19 @@ class Logger:
if not ids:
return False
archive_id = self.get_jid_id(archive_jid)
if groupchat:
column = 'jid_id'
else:
column = 'account_id'
sql = '''
SELECT stanza_id FROM logs
WHERE stanza_id IN ({values}) LIMIT 1
'''.format(values=', '.join('?' * len(ids)))
WHERE stanza_id IN ({values}) AND {archive} = ? LIMIT 1
'''.format(values=', '.join('?' * len(ids)),
archive=column)
result = self.con.execute(sql, tuple(ids)).fetchone()
result = self.con.execute(sql, tuple(ids) + (archive_id,)).fetchone()
if result is not None:
log.info('Found duplicated message, stanza-id: %s, origin-id: %s',