Improve deduplication
A stanza-id is only unique within a specific archive This adds the archive jid to the query
This commit is contained in:
parent
cac1185a23
commit
7f1776ede9
2 changed files with 27 additions and 9 deletions
|
@ -1062,7 +1062,7 @@ class MamMessageReceivedEvent(nec.NetworkIncomingEvent, HelperEvent):
|
|||
|
||||
def generate(self):
|
||||
archive_jid = self.stanza.getFrom()
|
||||
own_jid = self.conn.get_own_jid()
|
||||
own_jid = self.conn.get_own_jid().getStripped()
|
||||
if archive_jid and not archive_jid.bareMatch(own_jid):
|
||||
# MAM Message not from our Archive
|
||||
return False
|
||||
|
@ -1076,7 +1076,7 @@ class MamMessageReceivedEvent(nec.NetworkIncomingEvent, HelperEvent):
|
|||
self.unique_id, origin_id = self.get_unique_id()
|
||||
|
||||
# Check for duplicates
|
||||
if app.logger.find_stanza_id(self.unique_id, origin_id):
|
||||
if app.logger.find_stanza_id(own_jid, self.unique_id, origin_id):
|
||||
return
|
||||
|
||||
self.msgtxt = self.msg_.getTagData('body')
|
||||
|
@ -1150,16 +1150,18 @@ class MamGcMessageReceivedEvent(nec.NetworkIncomingEvent, HelperEvent):
|
|||
self.kind = KindConstant.GC_MSG
|
||||
|
||||
def generate(self):
|
||||
self.room_jid = self.stanza.getFrom()
|
||||
self.msg_ = self.forwarded.getTag('message', protocol=True)
|
||||
|
||||
if self.msg_.getType() != 'groupchat':
|
||||
return False
|
||||
|
||||
self.room_jid = self.stanza.getFrom().getStripped()
|
||||
|
||||
self.unique_id = self.get_stanza_id(self.result, query=True)
|
||||
|
||||
# Check for duplicates
|
||||
if app.logger.find_stanza_id(self.unique_id):
|
||||
if app.logger.find_stanza_id(self.room_jid, self.unique_id,
|
||||
groupchat=True):
|
||||
return
|
||||
|
||||
self.msgtxt = self.msg_.getTagData('body')
|
||||
|
@ -1282,7 +1284,9 @@ class MessageReceivedEvent(nec.NetworkIncomingEvent, HelperEvent):
|
|||
# Check groupchat messages for duplicates,
|
||||
# We do this because of MUC History messages
|
||||
if self.stanza.getType() == 'groupchat':
|
||||
if app.logger.find_stanza_id(self.unique_id):
|
||||
if app.logger.find_stanza_id(self.stanza.getFrom().getStripped(),
|
||||
self.unique_id,
|
||||
groupchat=True):
|
||||
return
|
||||
|
||||
address_tag = self.stanza.getTag('addresses',
|
||||
|
|
|
@ -1088,12 +1088,19 @@ class Logger:
|
|||
return True
|
||||
return False
|
||||
|
||||
def find_stanza_id(self, stanza_id, origin_id=None):
|
||||
def find_stanza_id(self, archive_jid, stanza_id, origin_id=None,
|
||||
groupchat=False):
|
||||
"""
|
||||
Checks if a stanza-id is already in the `logs` table
|
||||
|
||||
:param archive_jid: The jid of the archive the stanza-id belongs to
|
||||
|
||||
:param stanza_id: The stanza-id
|
||||
|
||||
:param origin_id: The origin-id
|
||||
|
||||
:param groupchat: stanza-id is from a groupchat
|
||||
|
||||
return True if the stanza-id was found
|
||||
"""
|
||||
ids = []
|
||||
|
@ -1105,12 +1112,19 @@ class Logger:
|
|||
if not ids:
|
||||
return False
|
||||
|
||||
archive_id = self.get_jid_id(archive_jid)
|
||||
if groupchat:
|
||||
column = 'jid_id'
|
||||
else:
|
||||
column = 'account_id'
|
||||
|
||||
sql = '''
|
||||
SELECT stanza_id FROM logs
|
||||
WHERE stanza_id IN ({values}) LIMIT 1
|
||||
'''.format(values=', '.join('?' * len(ids)))
|
||||
WHERE stanza_id IN ({values}) AND {archive} = ? LIMIT 1
|
||||
'''.format(values=', '.join('?' * len(ids)),
|
||||
archive=column)
|
||||
|
||||
result = self.con.execute(sql, tuple(ids)).fetchone()
|
||||
result = self.con.execute(sql, tuple(ids) + (archive_id,)).fetchone()
|
||||
|
||||
if result is not None:
|
||||
log.info('Found duplicated message, stanza-id: %s, origin-id: %s',
|
||||
|
|
Loading…
Add table
Reference in a new issue