Better dedupe MUC history messages

This commit is contained in:
Philipp Hörist 2019-03-11 22:47:21 +01:00
parent 851a7ee929
commit f5fe4fd8de
2 changed files with 59 additions and 10 deletions

View File

@ -1234,6 +1234,53 @@ class Logger:
self._con.executescript(sql)
self._timeout_commit()
def deduplicate_muc_message(self, account, jid, resource,
timestamp, message_id):
"""
Check if a message is already in the `logs` table
:param account: The account
:param jid: The muc jid as string
:param resource: The resource
:param timestamp: The timestamp in UTC epoch
:param message_id: The message-id
"""
# Add 60 seconds around the timestamp
start_time = timestamp - 60
end_time = timestamp + 60
account_id = self.get_account_id(account)
log.debug('Search for MUC duplicate')
log.debug('start: %s, end: %s, jid: %s, resource: %s, message-id: %s',
start_time, end_time, jid, resource, message_id)
sql = '''
SELECT * FROM logs
NATURAL JOIN jids WHERE
jid = ? AND
contact_name = ? AND
message_id = ? AND
account_id = ? AND
time BETWEEN ? AND ?
'''
result = self._con.execute(sql, (jid,
resource,
message_id,
account_id,
start_time,
end_time)).fetchone()
if result is not None:
log.debug('Found duplicate')
return True
return False
def search_for_duplicate(self, account, jid, timestamp, msg):
"""
Check if a message is already in the `logs` table

View File

@ -85,18 +85,20 @@ class Message(BaseModule):
type_ = properties.type
# Check for duplicates
stanza_id, message_id = self._get_unique_id(properties)
# Check groupchat messages for duplicates,
# We do this because of MUC History messages
if (properties.type.is_groupchat or
properties.is_self_message or
properties.is_muc_pm):
if properties.type.is_groupchat:
archive_jid = stanza.getFrom().getStripped()
else:
archive_jid = self._con.get_own_jid().getStripped()
if properties.type.is_groupchat and properties.has_server_delay:
# Only for XEP-0045 MUC History
# Dont check for message text because the message could be encrypted
if app.logger.deduplicate_muc_message(self._account,
properties.jid.getBare(),
properties.jid.getResource(),
properties.timestamp,
properties.id):
raise nbxmpp.NodeProcessed
if (properties.is_self_message or properties.is_muc_pm):
archive_jid = self._con.get_own_jid().getStripped()
if app.logger.find_stanza_id(self._account,
archive_jid,
stanza_id,