transform non-character unicode to ? char (\ufffd) as soon as we receive them. Fixes #6974

This commit is contained in:
Yann Leboulanger 2011-09-15 22:00:41 +02:00
parent d74ab91981
commit e5bb7672c2
1 changed files with 20 additions and 0 deletions

View File

@ -21,6 +21,7 @@ different handlers to different XMPP stanzas and namespaces
""" """
import simplexml, sys, locale import simplexml, sys, locale
import re
from xml.parsers.expat import ExpatError from xml.parsers.expat import ExpatError
from plugin import PlugIn from plugin import PlugIn
from protocol import (NS_STREAMS, NS_XMPP_STREAMS, NS_HTTP_BIND, Iq, Presence, from protocol import (NS_STREAMS, NS_XMPP_STREAMS, NS_HTTP_BIND, Iq, Presence,
@ -93,6 +94,24 @@ class XMPPDispatcher(PlugIn):
# Let the dispatcher know if there is support for stream management # Let the dispatcher know if there is support for stream management
self.sm = None self.sm = None
# \ufddo -> \ufdef range
c = u'\ufdd0'
r = c.encode('utf8')
while (c < u'\ufdef'):
c = unichr(ord(c) + 1)
r += '|' + c.encode('utf8')
# \ufffe-\uffff, \u1fffe-\u1ffff, ..., \u10fffe-\u10ffff
c = u'\ufffe'
r += '|' + c.encode('utf8')
r += '|' + unichr(ord(c) + 1).encode('utf8')
while (c < u'\U0010fffe'):
c = unichr(ord(c) + 0x10000)
r += '|' + c.encode('utf8')
r += '|' + unichr(ord(c) + 1).encode('utf8')
self.invalid_chars_re = re.compile(r)
def getAnID(self): def getAnID(self):
global outgoingID global outgoingID
outgoingID += 1 outgoingID += 1
@ -193,6 +212,7 @@ class XMPPDispatcher(PlugIn):
# disconnect method will never be called. # disconnect method will never be called.
# Is this intended? # Is this intended?
# also look at transports start_disconnect() # also look at transports start_disconnect()
data = re.sub(self.invalid_chars_re, u'\ufffd'.encode('utf-8'), data)
for handler in self._cycleHandlers: for handler in self._cycleHandlers:
handler(self) handler(self)
if len(self._pendingExceptions) > 0: if len(self._pendingExceptions) > 0: