gajim-plural/common/xmlstream.py

603 lines
20 KiB
Python
Raw Normal View History

2004-03-06 05:11:57 +01:00
## xmlstream.py
2003-10-22 20:45:13 +02:00
##
## Copyright (C) 2001 Matthew Allum
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU Lesser General Public License as published
## by the Free Software Foundation; either version 2, or (at your option)
## any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU Lesser General Public License for more details.
"""\
xmlstream.py provides simple functionality for implementing
XML stream based network protocols. It is used as a base
for jabber.py.
xmlstream.py manages the network connectivity and xml parsing
of the stream. When a complete 'protocol element' ( meaning a
complete child of the xmlstreams root ) is parsed the dipatch
method is called with a 'Node' instance of this structure.
The Node class is a very simple XML DOM like class for
manipulating XML documents or 'protocol elements' in this
case.
"""
2004-03-06 05:11:57 +01:00
# $Id$
2003-10-22 20:45:13 +02:00
import time, sys, re, socket
from select import select
from base64 import encodestring
2003-10-22 20:45:13 +02:00
import xml.parsers.expat
import debug
_debug=debug
2003-10-22 20:45:13 +02:00
2004-03-06 05:11:57 +01:00
VERSION = "0.5"
2003-10-22 20:45:13 +02:00
False = 0
True = 1
TCP = 1
STDIO = 0
TCP_SSL = 2
ENCODING = 'utf-8' # Though it is uncommon, this is the only right setting.
ustr = str
2003-10-22 20:45:13 +02:00
BLOCK_SIZE = 1024 ## Number of bytes to get at at time via socket
## transactions
DBG_INIT, DBG_ALWAYS = debug.DBG_INIT, debug.DBG_ALWAYS
DBG_CONN_ERROR = 'conn-error' ; debug.debug_flags.append( DBG_CONN_ERROR )
DBG_XML_PARSE = 'xml-parse' ; debug.debug_flags.append( DBG_XML_PARSE )
DBG_XML_RAW = 'xml-raw' ; debug.debug_flags.append( DBG_XML_RAW )
DBG_XML = [ DBG_XML_PARSE, DBG_XML_RAW ] # sample multiflag
2003-10-22 20:45:13 +02:00
def XMLescape(txt):
"Escape XML entities"
txt = txt.replace("&", "&")
txt = txt.replace("<", "&lt;")
txt = txt.replace(">", "&gt;")
2003-10-22 20:45:13 +02:00
return txt
def XMLunescape(txt):
"Unescape XML entities"
txt = txt.replace("&gt;", ">")
txt = txt.replace("&lt;", "<")
txt = txt.replace("&amp;", "&")
2003-10-22 20:45:13 +02:00
return txt
class error:
def __init__(self, value):
self.value = str(value)
def __str__(self):
return self.value
2004-03-06 05:11:57 +01:00
2003-10-22 20:45:13 +02:00
class Node:
"""A simple XML DOM like class"""
def __init__(self, tag=None, parent=None, attrs={}, payload=[], node=None):
if node:
if type(node)<>type(self): node=NodeBuilder(node).getDom()
self.name,self.namespace,self.attrs,self.data,self.kids,self.parent = \
node.name,node.namespace,node.attrs,node.data,node.kids,node.parent
2003-10-22 20:45:13 +02:00
else:
self.name,self.namespace,self.attrs,self.data,self.kids,self.parent = 'tag','',{},[],[],None
if tag: self.namespace, self.name = (['']+tag.split())[-2:]
if parent: self.parent = parent
2004-03-06 05:11:57 +01:00
# if self.parent and not self.namespace: self.namespace=self.parent.namespace # Doesn't checked if this neccessary
for attr in attrs.keys():
self.attrs[attr]=attrs[attr]
for i in payload:
if type(i)==type(self): self.insertNode(i)
else: self.insertXML(i)
2004-03-06 05:11:57 +01:00
# self.insertNode(Node(node=i)) # Alternative way. Needs perfomance testing.
2003-10-22 20:45:13 +02:00
def setParent(self, node):
"Set the nodes parent node."
self.parent = node
def getParent(self):
"return the nodes parent node."
return self.parent
def getName(self):
"Set the nodes tag name."
return self.name
def setName(self,val):
"Set the nodes tag name."
self.name = val
def putAttr(self, key, val):
"Add a name/value attribute to the node."
self.attrs[key] = val
def getAttr(self, key):
"Get a value for the nodes named attribute."
try: return self.attrs[key]
except: return None
2004-03-06 05:11:57 +01:00
2003-10-22 20:45:13 +02:00
def putData(self, data):
2004-03-06 05:11:57 +01:00
"Set the nodes textual data"
2003-10-22 20:45:13 +02:00
self.data.append(data)
def insertData(self, data):
2004-03-06 05:11:57 +01:00
"Set the nodes textual data"
2003-10-22 20:45:13 +02:00
self.data.append(data)
def getData(self):
2004-03-06 05:11:57 +01:00
"Return the nodes textual data"
return ''.join(self.data)
2003-10-22 20:45:13 +02:00
def getDataAsParts(self):
2004-03-06 05:11:57 +01:00
"Return the node data as an array"
2003-10-22 20:45:13 +02:00
return self.data
def getNamespace(self):
2004-03-06 05:11:57 +01:00
"Returns the nodes namespace."
2003-10-22 20:45:13 +02:00
return self.namespace
def setNamespace(self, namespace):
2004-03-06 05:11:57 +01:00
"Set the nodes namespace."
2003-10-22 20:45:13 +02:00
self.namespace = namespace
def insertTag(self, name=None, attrs={}, payload=[], node=None):
2003-10-22 20:45:13 +02:00
""" Add a child tag of name 'name' to the node.
Returns the newly created node.
"""
newnode = Node(tag=name, parent=self, attrs=attrs, payload=payload, node=node)
2003-10-22 20:45:13 +02:00
self.kids.append(newnode)
return newnode
def insertNode(self, node):
"Add a child node to the node"
self.kids.append(node)
return node
def insertXML(self, xml_str):
"Add raw xml as a child of the node"
newnode = NodeBuilder(xml_str).getDom()
self.kids.append(newnode)
return newnode
def __str__(self):
return self._xmlnode2str()
def _xmlnode2str(self, parent=None):
"""Returns an xml ( string ) representation of the node
and it children"""
2004-03-06 05:11:57 +01:00
s = "<" + self.name
2003-10-22 20:45:13 +02:00
if self.namespace:
if parent and parent.namespace != self.namespace:
s = s + " xmlns = '%s' " % self.namespace
for key in self.attrs.keys():
val = ustr(self.attrs[key])
2003-10-22 20:45:13 +02:00
s = s + " %s='%s'" % ( key, XMLescape(val) )
s = s + ">"
2004-03-06 05:11:57 +01:00
cnt = 0
2003-10-22 20:45:13 +02:00
if self.kids != None:
for a in self.kids:
if (len(self.data)-1) >= cnt: s = s + XMLescape(self.data[cnt])
s = s + a._xmlnode2str(parent=self)
cnt=cnt+1
if (len(self.data)-1) >= cnt: s = s + XMLescape(self.data[cnt])
if not self.kids and s[-1:]=='>':
s=s[:-1]+' />'
else:
s = s + "</" + self.name + ">"
2003-10-22 20:45:13 +02:00
return s
2004-03-06 05:11:57 +01:00
def getTag(self, name, index=None):
2003-10-22 20:45:13 +02:00
"""Returns a child node with tag name. Returns None
if not found."""
for node in self.kids:
if node.getName() == name:
2004-03-06 05:11:57 +01:00
if not index: return node
if index is not None: index-=1
2003-10-22 20:45:13 +02:00
return None
def getTags(self, name):
"""Like getTag but returns a list with matching child nodes"""
nodes=[]
for node in self.kids:
if node.getName() == name:
nodes.append(node)
return nodes
2004-03-06 05:11:57 +01:00
2003-10-22 20:45:13 +02:00
def getChildren(self):
"""Returns a nodes children"""
return self.kids
def removeTag(self,tag):
"""Pops out specified child and returns it."""
if type(tag)==type(self):
try:
self.kids.remove(tag)
return tag
except: return None
for node in self.kids:
if node.getName()==tag:
self.kids.remove(node)
return node
2003-10-22 20:45:13 +02:00
class NodeBuilder:
"""builds a 'minidom' from data parsed to it. Primarily for insertXML
method of Node"""
def __init__(self,data=None):
2003-10-22 20:45:13 +02:00
self._parser = xml.parsers.expat.ParserCreate(namespace_separator=' ')
self._parser.StartElementHandler = self.unknown_starttag
self._parser.EndElementHandler = self.unknown_endtag
self._parser.CharacterDataHandler = self.handle_data
self.__depth = 0
self._dispatch_depth = 1
2004-03-06 05:11:57 +01:00
if data: self._parser.Parse(data,1)
2003-10-22 20:45:13 +02:00
def unknown_starttag(self, tag, attrs):
"""XML Parser callback"""
2003-10-22 20:45:13 +02:00
self.__depth = self.__depth + 1
self.DEBUG("DEPTH -> %i , tag -> %s, attrs -> %s" % \
(self.__depth, tag, str(attrs)),DBG_XML_PARSE )
if self.__depth == self._dispatch_depth:
2003-10-22 20:45:13 +02:00
self._mini_dom = Node(tag=tag, attrs=attrs)
self._ptr = self._mini_dom
elif self.__depth > self._dispatch_depth:
self._ptr.kids.append(Node(tag=tag,parent=self._ptr,attrs=attrs))
2003-10-22 20:45:13 +02:00
self._ptr = self._ptr.kids[-1]
else: ## it the stream tag:
if attrs.has_key('id'):
self._incomingID = attrs['id']
self.last_is_data = False
2003-10-22 20:45:13 +02:00
def unknown_endtag(self, tag ):
"""XML Parser callback"""
self.DEBUG("DEPTH -> %i" % self.__depth,DBG_XML_PARSE)
if self.__depth == self._dispatch_depth:
2003-10-22 20:45:13 +02:00
self.dispatch(self._mini_dom)
elif self.__depth > self._dispatch_depth:
2003-10-22 20:45:13 +02:00
self._ptr = self._ptr.parent
else:
self.DEBUG("*** Stream terminated ? ****",DBG_CONN_ERROR)
self.__depth = self.__depth - 1
self.last_is_data = False
2003-10-22 20:45:13 +02:00
def handle_data(self, data):
"""XML Parser callback"""
self.DEBUG("data-> " + data,DBG_XML_PARSE)
if self.last_is_data:
self._ptr.data[-1] += data
else:
2003-10-22 20:45:13 +02:00
self._ptr.data.append(data)
self.last_is_data = True
2003-10-22 20:45:13 +02:00
def dispatch(self,dom):
pass
def DEBUG(self,dup1,dup2=None):
pass
2003-10-22 20:45:13 +02:00
def getDom(self):
return self._mini_dom
class Stream(NodeBuilder):
2004-03-06 05:11:57 +01:00
"""Extention of NodeBuilder class. Handles stream of XML stanzas.
Calls dispatch method for every child of root node
(stream:stream for jabber stream).
attributes _read, _write and _reader must be set by external entity
"""
def __init__(self, namespace,
debug=[DBG_ALWAYS],
2003-10-22 20:45:13 +02:00
log=None,
id=None,
timestampLog=True):
2003-10-22 20:45:13 +02:00
self._namespace = namespace
self._read , self._reader , self._write = None , None , None
2003-10-22 20:45:13 +02:00
self._incomingID = None
self._outgoingID = id
self._debug = _debug.Debug(debug,encoding=ENCODING)
self.DEBUG = self._debug.show # makes it backwards compatible with v0.4 code
self.DEBUG("stream init called",DBG_INIT)
2003-10-22 20:45:13 +02:00
if log:
if type(log) is type(""):
try:
self._logFH = open(log,'w')
except:
print "ERROR: can open %s for writing" % log
2003-10-22 20:45:13 +02:00
sys.exit(0)
else: ## assume its a stream type object
self._logFH = log
else:
self._logFH = None
self._timestampLog = timestampLog
def connect(self):
NodeBuilder.__init__(self)
self._dispatch_depth = 2
2003-10-22 20:45:13 +02:00
def timestampLog(self,timestamp):
""" Enable or disable the showing of a timestamp in the log.
By default, timestamping is enabled.
"""
self._timestampLog = timestamp
def read(self):
"""Reads incoming data. Blocks until done. Calls self.disconnected(self) if appropriate."""
try: received = self._read(BLOCK_SIZE)
except: received = ''
while select([self._reader],[],[],0)[0]:
add = self._read(BLOCK_SIZE)
received +=add
if not add: break
if len(received): # length of 0 means disconnect
self.DEBUG("got data " + received , DBG_XML_RAW )
self.log(received, 'RECV:')
else: self.disconnected(self)
return received
def write(self,raw_data):
"""Writes raw outgoing data. Blocks until done.
If supplied data is not unicode string, ENCODING
is used for convertion. Avoid this!
Always send your data as a unicode string."""
if type(raw_data) == type(''):
self.DEBUG('Non-utf-8 string "%s" passed to Stream.write! Treating it as %s encoded.'%(raw_data,ENCODING))
raw_data = unicode(raw_data,ENCODING)
data_out = raw_data.encode('utf-8')
2003-10-22 20:45:13 +02:00
try:
self._write(data_out)
2003-10-22 20:45:13 +02:00
self.log(data_out, 'SENT:')
self.DEBUG("sent %s" % data_out,DBG_XML_RAW)
2003-10-22 20:45:13 +02:00
except:
self.DEBUG("xmlstream write threw error",DBG_CONN_ERROR)
self.disconnected(self)
2004-03-06 05:11:57 +01:00
def process(self, timeout=0):
"""Receives incoming data (if any) and processes it.
Waits for data no more than timeout seconds."""
if select([self._reader],[],[],timeout)[0]:
data = self.read()
self._parser.Parse(data)
return len(data)
2004-03-06 05:11:57 +01:00
return '0' # Zero means that nothing received but link is alive.
2003-10-22 20:45:13 +02:00
def disconnect(self):
"""Close the stream and socket"""
self.write ( u"</stream:stream>" )
while self.process(): pass
2003-10-22 20:45:13 +02:00
self._sock.close()
self._sock = None
2004-03-06 05:11:57 +01:00
def disconnected(self,conn):
"""Called when a Network Error or disconnection occurs."""
try: self.disconnectHandler(conn)
except TypeError: self.disconnectHandler()
def disconnectHandler(self,conn): ## To be overidden ##
2003-10-22 20:45:13 +02:00
"""Called when a Network Error or disconnection occurs.
Designed to be overidden"""
raise error("Standart disconnectionHandler called. Replace it with appropriate for your client.")
2003-10-22 20:45:13 +02:00
def log(self, data, inout=''):
2004-03-06 05:11:57 +01:00
"""Logs data to the specified filehandle. Data is time stamped
2003-10-22 20:45:13 +02:00
and prefixed with inout"""
if self._logFH is not None:
if self._timestampLog:
self._logFH.write("%s - %s - %s\n" % (time.asctime(), inout, data))
else:
self._logFH.write("%s - %s\n" % (inout, data ) )
self._logFH.flush()
def getIncomingID(self):
"""Returns the streams ID"""
return self._incomingID
def getOutgoingID(self):
"""Returns the streams ID"""
return self._incomingID
class Client(Stream):
def __init__(self, host, port, namespace,
debug=[DBG_ALWAYS],
log=None,
sock=None,
id=None,
connection=TCP,
hostIP=None,
proxy=None):
Stream.__init__(self, namespace, debug, log, id)
self._host = host
2004-03-06 05:11:57 +01:00
self._port = port
self._sock = sock
self._connection = connection
if hostIP: self._hostIP = hostIP
else: self._hostIP = host
self._proxy = proxy
self._sslObj = None
self._sslIssuer = None
self._sslServer = None
def getSocket(self):
return self._sock
2003-10-22 20:45:13 +02:00
def connect(self):
"""Attempt to connect to specified host"""
self.DEBUG("client connect called to %s %s type %i" % (self._host,
self._port,
self._connection), DBG_INIT )
Stream.connect(self)
2003-10-22 20:45:13 +02:00
## TODO: check below that stdin/stdout are actually open
if self._connection == STDIO:
self._setupComms()
return
2003-10-22 20:45:13 +02:00
self._sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
try:
if self._proxy: self._sock.connect((self._proxy['host'], self._proxy['port']))
else: self._sock.connect((self._hostIP, self._port))
2003-10-22 20:45:13 +02:00
except socket.error, e:
self.DEBUG("socket error: "+str(e),DBG_CONN_ERROR)
raise
2003-10-22 20:45:13 +02:00
if self._connection == TCP_SSL:
try:
self.DEBUG("Attempting to create ssl socket",DBG_INIT)
2003-10-22 20:45:13 +02:00
self._sslObj = socket.ssl( self._sock, None, None )
self._sslIssuer = self._sslObj.issuer()
self._sslServer = self._sslObj.server()
except:
self.DEBUG("Socket Error: No SSL Support",DBG_CONN_ERROR)
raise
self._setupComms()
if self._proxy:
self.DEBUG("Proxy connected",DBG_INIT)
if self._proxy.has_key('type'): type = self._proxy['type'].upper()
else: type = 'CONNECT'
connector = []
if type == 'CONNECT':
connector.append(u'CONNECT %s:%s HTTP/1.0'%(self._hostIP,self._port))
elif type == 'PUT':
connector.append(u'PUT http://%s:%s/ HTTP/1.0'%(self._hostIP,self._port))
else:
self.DEBUG("Proxy Error: unknown proxy type",DBG_CONN_ERROR)
raise error('Unknown proxy type: '+type)
connector.append('Proxy-Connection: Keep-Alive')
connector.append('Pragma: no-cache')
connector.append('Host: %s:%s'%(self._hostIP,self._port))
connector.append('User-Agent: Jabberpy/'+VERSION)
if self._proxy.has_key('user') and self._proxy.has_key('password'):
credentials = '%s:%s'%(self._proxy['user'],self._proxy['password'])
credentials = encodestring(credentials).strip()
connector.append('Proxy-Authorization: Basic '+credentials)
connector.append('\r\n')
bak = self._read , self._write
self.write('\r\n'.join(connector))
reply = self.read().replace('\r','')
self._read , self._write = bak
try: proto,code,desc=reply.split('\n')[0].split(' ',2)
except: raise error('Invalid proxy reply')
if code<>'200': raise error('Invalid proxy reply: %s %s %s'%(proto,code,desc))
while reply.find('\n\n') == -1: reply += self.read().replace('\r','')
self.DEBUG("Jabber server connected",DBG_INIT)
2003-10-22 20:45:13 +02:00
self.header()
def _setupComms(self):
if self._connection == TCP:
self._read = self._sock.recv
self._write = self._sock.sendall
self._reader = self._sock
elif self._connection == TCP_SSL:
self._read = self._sslObj.read
self._write = self._sslObj.write
self._reader = self._sock
elif self._connection == STDIO:
self._read = self.stdin.read
self._write = self.stdout.write
self._reader = sys.stdin
else:
self.DEBUG('unknown connection type',DBG_CONN_ERROR)
raise IOError('unknown connection type')
2003-10-22 20:45:13 +02:00
2004-03-06 05:11:57 +01:00
class Server:
2003-10-22 20:45:13 +02:00
def now(self): return time.ctime(time.time())
def __init__(self, maxclients=10):
2004-03-06 05:11:57 +01:00
self.host = ''
2003-10-22 20:45:13 +02:00
self.port = 5222
self.streams = []
2004-03-06 05:11:57 +01:00
2003-10-22 20:45:13 +02:00
# make main sockets for accepting new client requests
self.mainsocks, self.readsocks, self.writesocks = [], [], []
self.portsock = socket(AF_INET, SOCK_STREAM)
2004-03-06 05:11:57 +01:00
self.portsock.bind((self.host, self.port))
self.portsock.listen(maxclients)
2003-10-22 20:45:13 +02:00
self.mainsocks.append(self.portsock) # add to main list to identify
2004-03-06 05:11:57 +01:00
self.readsocks.append(self.portsock) # add to select inputs list
2003-10-22 20:45:13 +02:00
# event loop: listen and multiplex until server process killed
def serve(self):
2004-03-06 05:11:57 +01:00
2003-10-22 20:45:13 +02:00
print 'select-server loop starting'
2004-03-06 05:11:57 +01:00
2003-10-22 20:45:13 +02:00
while 1:
print "LOOPING"
readables, writeables, exceptions = select(self.readsocks,
self.writesocks, [])
for sockobj in readables:
if sockobj in self. mainsocks: # for ready input sockets
newsock, address = sockobj.accept() # accept not block
2004-03-06 05:11:57 +01:00
print 'Connect:', address, id(newsock)
2003-10-22 20:45:13 +02:00
self.readsocks.append(newsock)
self._makeNewStream(newsock)
# add to select list, wait
else:
# client socket: read next line
data = sockobj.recv(1024)
# recv should not block
print '\tgot', data, 'on', id(sockobj)
2004-03-06 05:11:57 +01:00
if not data: # if closed by the clients
2003-10-22 20:45:13 +02:00
sockobj.close() # close here and remv from
2004-03-06 05:11:57 +01:00
self.readsocks.remove(sockobj)
2003-10-22 20:45:13 +02:00
else:
# this may block: should really select for writes too
sockobj.send('Echo=>%s' % data)
def _makeNewStream(self, sckt):
new_stream = Stream('localhost', 5222,
'jabber:client',
sock=sckt)
self.streams.append(new_stream)
## maybe overide for a 'server stream'
new_stream.header()
return new_stream
def _getStreamSockets(self):
socks = [];
for s in self.streams:
socks.append(s.getSocket())
return socks
2004-03-06 05:11:57 +01:00
2003-10-22 20:45:13 +02:00
def _getStreamFromSocket(self, sock):
for s in self.streams:
if s.getSocket() == sock:
return s
return None