Improve deduplication
A stanza-id is only unique within a specific archive This adds the archive jid to the query
This commit is contained in:
parent
cac1185a23
commit
7f1776ede9
2 changed files with 27 additions and 9 deletions
|
@ -1062,7 +1062,7 @@ class MamMessageReceivedEvent(nec.NetworkIncomingEvent, HelperEvent):
|
||||||
|
|
||||||
def generate(self):
|
def generate(self):
|
||||||
archive_jid = self.stanza.getFrom()
|
archive_jid = self.stanza.getFrom()
|
||||||
own_jid = self.conn.get_own_jid()
|
own_jid = self.conn.get_own_jid().getStripped()
|
||||||
if archive_jid and not archive_jid.bareMatch(own_jid):
|
if archive_jid and not archive_jid.bareMatch(own_jid):
|
||||||
# MAM Message not from our Archive
|
# MAM Message not from our Archive
|
||||||
return False
|
return False
|
||||||
|
@ -1076,7 +1076,7 @@ class MamMessageReceivedEvent(nec.NetworkIncomingEvent, HelperEvent):
|
||||||
self.unique_id, origin_id = self.get_unique_id()
|
self.unique_id, origin_id = self.get_unique_id()
|
||||||
|
|
||||||
# Check for duplicates
|
# Check for duplicates
|
||||||
if app.logger.find_stanza_id(self.unique_id, origin_id):
|
if app.logger.find_stanza_id(own_jid, self.unique_id, origin_id):
|
||||||
return
|
return
|
||||||
|
|
||||||
self.msgtxt = self.msg_.getTagData('body')
|
self.msgtxt = self.msg_.getTagData('body')
|
||||||
|
@ -1150,16 +1150,18 @@ class MamGcMessageReceivedEvent(nec.NetworkIncomingEvent, HelperEvent):
|
||||||
self.kind = KindConstant.GC_MSG
|
self.kind = KindConstant.GC_MSG
|
||||||
|
|
||||||
def generate(self):
|
def generate(self):
|
||||||
self.room_jid = self.stanza.getFrom()
|
|
||||||
self.msg_ = self.forwarded.getTag('message', protocol=True)
|
self.msg_ = self.forwarded.getTag('message', protocol=True)
|
||||||
|
|
||||||
if self.msg_.getType() != 'groupchat':
|
if self.msg_.getType() != 'groupchat':
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
self.room_jid = self.stanza.getFrom().getStripped()
|
||||||
|
|
||||||
self.unique_id = self.get_stanza_id(self.result, query=True)
|
self.unique_id = self.get_stanza_id(self.result, query=True)
|
||||||
|
|
||||||
# Check for duplicates
|
# Check for duplicates
|
||||||
if app.logger.find_stanza_id(self.unique_id):
|
if app.logger.find_stanza_id(self.room_jid, self.unique_id,
|
||||||
|
groupchat=True):
|
||||||
return
|
return
|
||||||
|
|
||||||
self.msgtxt = self.msg_.getTagData('body')
|
self.msgtxt = self.msg_.getTagData('body')
|
||||||
|
@ -1282,7 +1284,9 @@ class MessageReceivedEvent(nec.NetworkIncomingEvent, HelperEvent):
|
||||||
# Check groupchat messages for duplicates,
|
# Check groupchat messages for duplicates,
|
||||||
# We do this because of MUC History messages
|
# We do this because of MUC History messages
|
||||||
if self.stanza.getType() == 'groupchat':
|
if self.stanza.getType() == 'groupchat':
|
||||||
if app.logger.find_stanza_id(self.unique_id):
|
if app.logger.find_stanza_id(self.stanza.getFrom().getStripped(),
|
||||||
|
self.unique_id,
|
||||||
|
groupchat=True):
|
||||||
return
|
return
|
||||||
|
|
||||||
address_tag = self.stanza.getTag('addresses',
|
address_tag = self.stanza.getTag('addresses',
|
||||||
|
|
|
@ -1088,12 +1088,19 @@ class Logger:
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def find_stanza_id(self, stanza_id, origin_id=None):
|
def find_stanza_id(self, archive_jid, stanza_id, origin_id=None,
|
||||||
|
groupchat=False):
|
||||||
"""
|
"""
|
||||||
Checks if a stanza-id is already in the `logs` table
|
Checks if a stanza-id is already in the `logs` table
|
||||||
|
|
||||||
|
:param archive_jid: The jid of the archive the stanza-id belongs to
|
||||||
|
|
||||||
:param stanza_id: The stanza-id
|
:param stanza_id: The stanza-id
|
||||||
|
|
||||||
|
:param origin_id: The origin-id
|
||||||
|
|
||||||
|
:param groupchat: stanza-id is from a groupchat
|
||||||
|
|
||||||
return True if the stanza-id was found
|
return True if the stanza-id was found
|
||||||
"""
|
"""
|
||||||
ids = []
|
ids = []
|
||||||
|
@ -1105,12 +1112,19 @@ class Logger:
|
||||||
if not ids:
|
if not ids:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
archive_id = self.get_jid_id(archive_jid)
|
||||||
|
if groupchat:
|
||||||
|
column = 'jid_id'
|
||||||
|
else:
|
||||||
|
column = 'account_id'
|
||||||
|
|
||||||
sql = '''
|
sql = '''
|
||||||
SELECT stanza_id FROM logs
|
SELECT stanza_id FROM logs
|
||||||
WHERE stanza_id IN ({values}) LIMIT 1
|
WHERE stanza_id IN ({values}) AND {archive} = ? LIMIT 1
|
||||||
'''.format(values=', '.join('?' * len(ids)))
|
'''.format(values=', '.join('?' * len(ids)),
|
||||||
|
archive=column)
|
||||||
|
|
||||||
result = self.con.execute(sql, tuple(ids)).fetchone()
|
result = self.con.execute(sql, tuple(ids) + (archive_id,)).fetchone()
|
||||||
|
|
||||||
if result is not None:
|
if result is not None:
|
||||||
log.info('Found duplicated message, stanza-id: %s, origin-id: %s',
|
log.info('Found duplicated message, stanza-id: %s, origin-id: %s',
|
||||||
|
|
Loading…
Add table
Reference in a new issue