transform non-character unicode to ? char (\ufffd) as soon as we receive them. Fixes #6974

This commit is contained in:
Yann Leboulanger 2011-09-15 22:00:41 +02:00
parent d74ab91981
commit e5bb7672c2
1 changed files with 20 additions and 0 deletions

View File

@ -21,6 +21,7 @@ different handlers to different XMPP stanzas and namespaces
"""
import simplexml, sys, locale
import re
from xml.parsers.expat import ExpatError
from plugin import PlugIn
from protocol import (NS_STREAMS, NS_XMPP_STREAMS, NS_HTTP_BIND, Iq, Presence,
@ -93,6 +94,24 @@ class XMPPDispatcher(PlugIn):
# Let the dispatcher know if there is support for stream management
self.sm = None
# \ufddo -> \ufdef range
c = u'\ufdd0'
r = c.encode('utf8')
while (c < u'\ufdef'):
c = unichr(ord(c) + 1)
r += '|' + c.encode('utf8')
# \ufffe-\uffff, \u1fffe-\u1ffff, ..., \u10fffe-\u10ffff
c = u'\ufffe'
r += '|' + c.encode('utf8')
r += '|' + unichr(ord(c) + 1).encode('utf8')
while (c < u'\U0010fffe'):
c = unichr(ord(c) + 0x10000)
r += '|' + c.encode('utf8')
r += '|' + unichr(ord(c) + 1).encode('utf8')
self.invalid_chars_re = re.compile(r)
def getAnID(self):
global outgoingID
outgoingID += 1
@ -193,6 +212,7 @@ class XMPPDispatcher(PlugIn):
# disconnect method will never be called.
# Is this intended?
# also look at transports start_disconnect()
data = re.sub(self.invalid_chars_re, u'\ufffd'.encode('utf-8'), data)
for handler in self._cycleHandlers:
handler(self)
if len(self._pendingExceptions) > 0: