Better dedupe MUC history messages
This commit is contained in:
		
							parent
							
								
									851a7ee929
								
							
						
					
					
						commit
						f5fe4fd8de
					
				
					 2 changed files with 59 additions and 10 deletions
				
			
		|  | @ -1234,6 +1234,53 @@ class Logger: | |||
|         self._con.executescript(sql) | ||||
|         self._timeout_commit() | ||||
| 
 | ||||
|     def deduplicate_muc_message(self, account, jid, resource, | ||||
|                                 timestamp, message_id): | ||||
|         """ | ||||
|         Check if a message is already in the `logs` table | ||||
| 
 | ||||
|         :param account:     The account | ||||
| 
 | ||||
|         :param jid:         The muc jid as string | ||||
| 
 | ||||
|         :param resource:    The resource | ||||
| 
 | ||||
|         :param timestamp:   The timestamp in UTC epoch | ||||
| 
 | ||||
|         :param message_id:  The message-id | ||||
|         """ | ||||
| 
 | ||||
|         # Add 60 seconds around the timestamp | ||||
|         start_time = timestamp - 60 | ||||
|         end_time = timestamp + 60 | ||||
| 
 | ||||
|         account_id = self.get_account_id(account) | ||||
|         log.debug('Search for MUC duplicate') | ||||
|         log.debug('start: %s, end: %s, jid: %s, resource: %s, message-id: %s', | ||||
|                   start_time, end_time, jid, resource, message_id) | ||||
| 
 | ||||
|         sql = ''' | ||||
|             SELECT * FROM logs | ||||
|             NATURAL JOIN jids WHERE | ||||
|             jid = ? AND | ||||
|             contact_name = ? AND | ||||
|             message_id = ? AND | ||||
|             account_id = ? AND | ||||
|             time BETWEEN ? AND ? | ||||
|             ''' | ||||
| 
 | ||||
|         result = self._con.execute(sql, (jid, | ||||
|                                          resource, | ||||
|                                          message_id, | ||||
|                                          account_id, | ||||
|                                          start_time, | ||||
|                                          end_time)).fetchone() | ||||
| 
 | ||||
|         if result is not None: | ||||
|             log.debug('Found duplicate') | ||||
|             return True | ||||
|         return False | ||||
| 
 | ||||
|     def search_for_duplicate(self, account, jid, timestamp, msg): | ||||
|         """ | ||||
|         Check if a message is already in the `logs` table | ||||
|  |  | |||
|  | @ -85,18 +85,20 @@ class Message(BaseModule): | |||
| 
 | ||||
|         type_ = properties.type | ||||
| 
 | ||||
|         # Check for duplicates | ||||
|         stanza_id, message_id = self._get_unique_id(properties) | ||||
| 
 | ||||
|         # Check groupchat messages for duplicates, | ||||
|         # We do this because of MUC History messages | ||||
|         if (properties.type.is_groupchat or | ||||
|                 properties.is_self_message or | ||||
|                 properties.is_muc_pm): | ||||
|             if properties.type.is_groupchat: | ||||
|                 archive_jid = stanza.getFrom().getStripped() | ||||
|             else: | ||||
|                 archive_jid = self._con.get_own_jid().getStripped() | ||||
|         if properties.type.is_groupchat and properties.has_server_delay: | ||||
|             # Only for XEP-0045 MUC History | ||||
|             # Dont check for message text because the message could be encrypted | ||||
|             if app.logger.deduplicate_muc_message(self._account, | ||||
|                                                   properties.jid.getBare(), | ||||
|                                                   properties.jid.getResource(), | ||||
|                                                   properties.timestamp, | ||||
|                                                   properties.id): | ||||
|                 raise nbxmpp.NodeProcessed | ||||
| 
 | ||||
|         if (properties.is_self_message or properties.is_muc_pm): | ||||
|             archive_jid = self._con.get_own_jid().getStripped() | ||||
|             if app.logger.find_stanza_id(self._account, | ||||
|                                          archive_jid, | ||||
|                                          stanza_id, | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue