diff options
author | Yann Leboulanger <asterix@lagaule.org> | 2012-05-08 16:19:11 +0400 |
---|---|---|
committer | Yann Leboulanger <asterix@lagaule.org> | 2012-05-08 16:19:11 +0400 |
commit | 97ac572c20083fe0283c4621dc6e54603a32a310 (patch) | |
tree | 42c7b0d6121e71538d804136a8929b7b25adb96c /nbxmpp/simplexml.py |
initial revisionnbxmpp-0.1
Diffstat (limited to 'nbxmpp/simplexml.py')
-rw-r--r-- | nbxmpp/simplexml.py | 693 |
1 files changed, 693 insertions, 0 deletions
diff --git a/nbxmpp/simplexml.py b/nbxmpp/simplexml.py new file mode 100644 index 0000000..ad907be --- /dev/null +++ b/nbxmpp/simplexml.py @@ -0,0 +1,693 @@ +## simplexml.py based on Mattew Allum's xmlstream.py +## +## Copyright (C) 2003-2005 Alexey "Snake" Nezhdanov +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. + +# $Id: simplexml.py,v 1.27 2005/04/30 07:20:27 snakeru Exp $ + +""" +Simplexml module provides xmpppy library with all needed tools to handle XML +nodes and XML streams. I'm personally using it in many other separate +projects. It is designed to be as standalone as possible +""" + +import xml.parsers.expat +import logging +log = logging.getLogger('nbxmpp.simplexml') + +def XMLescape(txt): + """ + Return provided string with symbols & < > " replaced by their respective XML + entities + """ + # replace also FORM FEED and ESC, because they are not valid XML chars + return txt.replace("&", "&").replace("<", "<").replace(">", ">").replace('"', """).replace(u'\x0C', "").replace(u'\x1B', "") + +ENCODING='utf-8' + +def ustr(what): + """ + Converts object "what" to unicode string using it's own __str__ method if + accessible or unicode method otherwise + """ + if isinstance(what, unicode): + return what + try: + r = what.__str__() + except AttributeError: + r = str(what) + if not isinstance(r, unicode): + return unicode(r, ENCODING) + return r + +class Node(object): + """ + Node class describes syntax of separate XML Node. It have a constructor that + permits node creation from set of "namespace name", attributes and payload + of text strings and other nodes. It does not natively support building node + from text string and uses NodeBuilder class for that purpose. After + creation node can be mangled in many ways so it can be completely changed. + Also node can be serialised into string in one of two modes: default (where + the textual representation of node describes it exactly) and "fancy" - with + whitespace added to make indentation and thus make result more readable by + human. + + Node class have attribute FORCE_NODE_RECREATION that is defaults to False + thus enabling fast node replication from the some other node. The drawback + of the fast way is that new node shares some info with the "original" node + that is changing the one node may influence the other. Though it is rarely + needed (in xmpppy it is never needed at all since I'm usually never using + original node after replication (and using replication only to move upwards + on the classes tree). + """ + + FORCE_NODE_RECREATION = 0 + + def __init__(self, tag=None, attrs={}, payload=[], parent=None, nsp=None, + node_built=False, node=None): + """ + Takes "tag" argument as the name of node (prepended by namespace, if + needed and separated from it by a space), attrs dictionary as the set of + arguments, payload list as the set of textual strings and child nodes + that this node carries within itself and "parent" argument that is + another node that this one will be the child of. Also the __init__ can be + provided with "node" argument that is either a text string containing + exactly one node or another Node instance to begin with. If both "node" + and other arguments is provided then the node initially created as + replica of "node" provided and then modified to be compliant with other + arguments. + """ + if node: + if self.FORCE_NODE_RECREATION and isinstance(node, Node): + node = str(node) + if not isinstance(node, Node): + node = NodeBuilder(node, self) + node_built = True + else: + self.name, self.namespace, self.attrs, self.data, self.kids, self.parent, self.nsd = node.name, node.namespace, {}, [], [], node.parent, {} + for key in node.attrs.keys(): + self.attrs[key] = node.attrs[key] + for data in node.data: + self.data.append(data) + for kid in node.kids: + self.kids.append(kid) + for k, v in node.nsd.items(): + self.nsd[k] = v + else: + self.name, self.namespace, self.attrs, self.data, self.kids, self.parent, self.nsd = 'tag', '', {}, [], [], None, {} + if parent: + self.parent = parent + self.nsp_cache = {} + if nsp: + for k, v in nsp.items(): self.nsp_cache[k] = v + for attr, val in attrs.items(): + if attr == 'xmlns': + self.nsd[u''] = val + elif attr.startswith('xmlns:'): + self.nsd[attr[6:]] = val + self.attrs[attr]=attrs[attr] + if tag: + if node_built: + pfx, self.name = (['']+tag.split(':'))[-2:] + self.namespace = self.lookup_nsp(pfx) + else: + if ' ' in tag: + self.namespace, self.name = tag.split() + else: + self.name = tag + if isinstance(payload, basestring): payload=[payload] + for i in payload: + if isinstance(i, Node): + self.addChild(node=i) + else: + self.data.append(ustr(i)) + + def lookup_nsp(self, pfx=''): + ns = self.nsd.get(pfx, None) + if ns is None: + ns = self.nsp_cache.get(pfx, None) + if ns is None: + if self.parent: + ns = self.parent.lookup_nsp(pfx) + self.nsp_cache[pfx] = ns + else: + return 'http://www.gajim.org/xmlns/undeclared' + return ns + + def __str__(self, fancy=0): + """ + Method used to dump node into textual representation. If "fancy" argument + is set to True produces indented output for readability + """ + s = (fancy-1) * 2 * ' ' + "<" + self.name + if self.namespace: + if not self.parent or self.parent.namespace!=self.namespace: + if 'xmlns' not in self.attrs: + s = s + ' xmlns="%s"'%self.namespace + for key in self.attrs.keys(): + val = ustr(self.attrs[key]) + s = s + ' %s="%s"' % ( key, XMLescape(val) ) + s = s + ">" + cnt = 0 + if self.kids: + if fancy: s = s + "\n" + for a in self.kids: + if not fancy and (len(self.data)-1)>=cnt: s=s+XMLescape(self.data[cnt]) + elif (len(self.data)-1)>=cnt: s=s+XMLescape(self.data[cnt].strip()) + if isinstance(a, str) or isinstance(a, unicode): + s = s + a.__str__() + else: + s = s + a.__str__(fancy and fancy+1) + cnt=cnt+1 + if not fancy and (len(self.data)-1) >= cnt: s = s + XMLescape(self.data[cnt]) + elif (len(self.data)-1) >= cnt: s = s + XMLescape(self.data[cnt].strip()) + if not self.kids and s.endswith('>'): + s=s[:-1]+' />' + if fancy: s = s + "\n" + else: + if fancy and not self.data: s = s + (fancy-1) * 2 * ' ' + s = s + "</" + self.name + ">" + if fancy: s = s + "\n" + return s + + def addChild(self, name=None, attrs={}, payload=[], namespace=None, node=None): + """ + If "node" argument is provided, adds it as child node. Else creates new + node from the other arguments' values and adds it as well + """ + if 'xmlns' in attrs: + raise AttributeError("Use namespace=x instead of attrs={'xmlns':x}") + if node: + newnode=node + node.parent = self + else: newnode=Node(tag=name, parent=self, attrs=attrs, payload=payload) + if namespace: + newnode.setNamespace(namespace) + self.kids.append(newnode) + return newnode + + def addData(self, data): + """ + Add some CDATA to node + """ + self.data.append(ustr(data)) + + def clearData(self): + """ + Remove all CDATA from the node + """ + self.data = [] + + def delAttr(self, key): + """ + Delete an attribute "key" + """ + del self.attrs[key] + + def delChild(self, node, attrs={}): + """ + Delete the "node" from the node's childs list, if "node" is an instance. + Else delete the first node that have specified name and (optionally) + attributes + """ + if not isinstance(node, Node): + node = self.getTag(node, attrs) + self.kids.remove(node) + return node + + def getAttrs(self): + """ + Return all node's attributes as dictionary + """ + return self.attrs + + def getAttr(self, key): + """ + Return value of specified attribute + """ + return self.attrs.get(key) + + def getChildren(self): + """ + Return all node's child nodes as list + """ + return self.kids + + def getData(self): + """ + Return all node CDATA as string (concatenated) + """ + return ''.join(self.data) + + def getName(self): + """ + Return the name of node + """ + return self.name + + def getNamespace(self): + """ + Return the namespace of node + """ + return self.namespace + + def getParent(self): + """ + Returns the parent of node (if present) + """ + return self.parent + + def getPayload(self): + """ + Return the payload of node i.e. list of child nodes and CDATA entries. + F.e. for "<node>text1<nodea/><nodeb/> text2</node>" will be returned + list: ['text1', <nodea instance>, <nodeb instance>, ' text2'] + """ + ret = [] + for i in range(len(self.kids)+len(self.data)+1): + try: + if self.data[i]: + ret.append(self.data[i]) + except IndexError: + pass + try: + ret.append(self.kids[i]) + except IndexError: + pass + return ret + + def getTag(self, name, attrs={}, namespace=None): + """ + Filter all child nodes using specified arguments as filter. Return the + first found or None if not found + """ + return self.getTags(name, attrs, namespace, one=1) + + def getTagAttr(self, tag, attr): + """ + Return attribute value of the child with specified name (or None if no + such attribute) + """ + try: + return self.getTag(tag).attrs[attr] + except: + return None + + def getTagData(self, tag): + """ + Return cocatenated CDATA of the child with specified name + """ + try: + return self.getTag(tag).getData() + except Exception: + return None + + def getTags(self, name, attrs={}, namespace=None, one=0): + """ + Filter all child nodes using specified arguments as filter. Returns the + list of nodes found + """ + nodes = [] + for node in self.kids: + if namespace and namespace != node.getNamespace(): + continue + if node.getName() == name: + for key in attrs.keys(): + if key not in node.attrs or node.attrs[key]!=attrs[key]: + break + else: + nodes.append(node) + if one and nodes: + return nodes[0] + if not one: + return nodes + + def iterTags(self, name, attrs={}, namespace=None): + """ + Iterate over all children using specified arguments as filter + """ + for node in self.kids: + if namespace is not None and namespace != node.getNamespace(): + continue + if node.getName() == name: + for key in attrs.keys(): + if key not in node.attrs or \ + node.attrs[key]!=attrs[key]: + break + else: + yield node + + def setAttr(self, key, val): + """ + Set attribute "key" with the value "val" + """ + self.attrs[key] = val + + def setData(self, data): + """ + Set node's CDATA to provided string. Resets all previous CDATA! + """ + self.data = [ustr(data)] + + def setName(self, val): + """ + Change the node name + """ + self.name = val + + def setNamespace(self, namespace): + """ + Changes the node namespace + """ + self.namespace = namespace + + def setParent(self, node): + """ + Set node's parent to "node". WARNING: do not checks if the parent already + present and not removes the node from the list of childs of previous + parent + """ + self.parent = node + + def setPayload(self, payload, add=0): + """ + Set node payload according to the list specified. WARNING: completely + replaces all node's previous content. If you wish just to add child or + CDATA - use addData or addChild methods + """ + if isinstance(payload, basestring): + payload = [payload] + if add: + self.kids += payload + else: + self.kids = payload + + def setTag(self, name, attrs={}, namespace=None): + """ + Same as getTag but if the node with specified namespace/attributes not + found, creates such node and returns it + """ + node = self.getTags(name, attrs, namespace=namespace, one=1) + if node: + return node + else: + return self.addChild(name, attrs, namespace=namespace) + + def setTagAttr(self, tag, attr, val): + """ + Create new node (if not already present) with name "tag" and set it's + attribute "attr" to value "val" + """ + try: + self.getTag(tag).attrs[attr] = val + except Exception: + self.addChild(tag, attrs={attr: val}) + + def setTagData(self, tag, val, attrs={}): + """ + Creates new node (if not already present) with name "tag" and + (optionally) attributes "attrs" and sets it's CDATA to string "val" + """ + try: + self.getTag(tag, attrs).setData(ustr(val)) + except Exception: + self.addChild(tag, attrs, payload = [ustr(val)]) + + def has_attr(self, key): + """ + Check if node have attribute "key" + """ + return key in self.attrs + + def __getitem__(self, item): + """ + Return node's attribute "item" value + """ + return self.getAttr(item) + + def __setitem__(self, item, val): + """ + Set node's attribute "item" value + """ + return self.setAttr(item, val) + + def __delitem__(self, item): + """ + Delete node's attribute "item" + """ + return self.delAttr(item) + + def __contains__(self, item): + """ + Check if node has attribute "item" + """ + return self.has_attr(item) + + def __getattr__(self, attr): + """ + Reduce memory usage caused by T/NT classes - use memory only when needed + """ + if attr == 'T': + self.T = T(self) + return self.T + if attr == 'NT': + self.NT = NT(self) + return self.NT + raise AttributeError + +class T: + """ + Auxiliary class used to quick access to node's child nodes + """ + + def __init__(self, node): + self.__dict__['node'] = node + + def __getattr__(self, attr): + return self.node.setTag(attr) + + def __setattr__(self, attr, val): + if isinstance(val, Node): + Node.__init__(self.node.setTag(attr), node=val) + else: + return self.node.setTagData(attr, val) + + def __delattr__(self, attr): + return self.node.delChild(attr) + +class NT(T): + """ + Auxiliary class used to quick create node's child nodes + """ + + def __getattr__(self, attr): + return self.node.addChild(attr) + + def __setattr__(self, attr, val): + if isinstance(val, Node): + self.node.addChild(attr, node=val) + else: + return self.node.addChild(attr, payload=[val]) + +class NodeBuilder: + """ + Builds a Node class minidom from data parsed to it. This class used for two + purposes: + + 1. Creation an XML Node from a textual representation. F.e. reading a + config file. See an XML2Node method. + 2. Handling an incoming XML stream. This is done by mangling the + __dispatch_depth parameter and redefining the dispatch method. + + You do not need to use this class directly if you do not designing your own + XML handler + """ + + def __init__(self, data=None, initial_node=None): + """ + Take two optional parameters: "data" and "initial_node" + + By default class initialised with empty Node class instance. Though, if + "initial_node" is provided it used as "starting point". You can think + about it as of "node upgrade". "data" (if provided) feeded to parser + immidiatedly after instance init. + """ + log.debug("Preparing to handle incoming XML stream.") + self._parser = xml.parsers.expat.ParserCreate() + self._parser.StartElementHandler = self.starttag + self._parser.EndElementHandler = self.endtag + self._parser.StartNamespaceDeclHandler = self.handle_namespace_start + self._parser.CharacterDataHandler = self.handle_cdata + self._parser.buffer_text = True + self.Parse = self._parser.Parse + + self.__depth = 0 + self.__last_depth = 0 + self.__max_depth = 0 + self._dispatch_depth = 1 + self._document_attrs = None + self._document_nsp = None + self._mini_dom=initial_node + self.last_is_data = 1 + self._ptr=None + self.data_buffer = None + self.streamError = '' + if data: + self._parser.Parse(data, 1) + + def check_data_buffer(self): + if self.data_buffer: + self._ptr.data.append(''.join(self.data_buffer)) + del self.data_buffer[:] + self.data_buffer = None + + def destroy(self): + """ + Method used to allow class instance to be garbage-collected + """ + self.check_data_buffer() + self._parser.StartElementHandler = None + self._parser.EndElementHandler = None + self._parser.CharacterDataHandler = None + self._parser.StartNamespaceDeclHandler = None + + def starttag(self, tag, attrs): + """ + XML Parser callback. Used internally + """ + self.check_data_buffer() + self._inc_depth() + log.info("STARTTAG.. DEPTH -> %i , tag -> %s, attrs -> %s" % (self.__depth, tag, `attrs`)) + if self.__depth == self._dispatch_depth: + if not self._mini_dom : + self._mini_dom = Node(tag=tag, attrs=attrs, nsp = self._document_nsp, node_built=True) + else: + Node.__init__(self._mini_dom, tag=tag, attrs=attrs, nsp = self._document_nsp, node_built=True) + self._ptr = self._mini_dom + elif self.__depth > self._dispatch_depth: + self._ptr.kids.append(Node(tag=tag, parent=self._ptr, attrs=attrs, node_built=True)) + self._ptr = self._ptr.kids[-1] + if self.__depth == 1: + self._document_attrs = {} + self._document_nsp = {} + nsp, name = (['']+tag.split(':'))[-2:] + for attr, val in attrs.items(): + if attr == 'xmlns': + self._document_nsp[u''] = val + elif attr.startswith('xmlns:'): + self._document_nsp[attr[6:]] = val + else: + self._document_attrs[attr] = val + ns = self._document_nsp.get(nsp, 'http://www.gajim.org/xmlns/undeclared-root') + try: + self.stream_header_received(ns, name, attrs) + except ValueError, e: + self._document_attrs = None + raise ValueError(str(e)) + if not self.last_is_data and self._ptr.parent: + self._ptr.parent.data.append('') + self.last_is_data = 0 + + def endtag(self, tag ): + """ + XML Parser callback. Used internally + """ + log.info("DEPTH -> %i , tag -> %s" % (self.__depth, tag)) + self.check_data_buffer() + if self.__depth == self._dispatch_depth: + if self._mini_dom.getName() == 'error': + children = self._mini_dom.getChildren() + if children: + self.streamError = children[0].getName() + else: + self.streamError = self._mini_dom.getData() + self.dispatch(self._mini_dom) + elif self.__depth > self._dispatch_depth: + self._ptr = self._ptr.parent + else: + log.info("Got higher than dispatch level. Stream terminated?") + self._dec_depth() + self.last_is_data = 0 + if self.__depth == 0: self.stream_footer_received() + + def handle_cdata(self, data): + if self.last_is_data: + if self.data_buffer: + self.data_buffer.append(data) + elif self._ptr: + self.data_buffer = [data] + self.last_is_data = 1 + + def handle_namespace_start(self, prefix, uri): + """ + XML Parser callback. Used internally + """ + self.check_data_buffer() + + def getDom(self): + """ + Return just built Node + """ + self.check_data_buffer() + return self._mini_dom + + def dispatch(self, stanza): + """ + Get called when the NodeBuilder reaches some level of depth on it's way + up with the built node as argument. Can be redefined to convert incoming + XML stanzas to program events + """ + pass + + def stream_header_received(self, ns, tag, attrs): + """ + Method called when stream just opened + """ + self.check_data_buffer() + + def stream_footer_received(self): + """ + Method called when stream just closed + """ + self.check_data_buffer() + + def has_received_endtag(self, level=0): + """ + Return True if at least one end tag was seen (at level) + """ + return self.__depth <= level and self.__max_depth > level + + def _inc_depth(self): + self.__last_depth = self.__depth + self.__depth += 1 + self.__max_depth = max(self.__depth, self.__max_depth) + + def _dec_depth(self): + self.__last_depth = self.__depth + self.__depth -= 1 + +def XML2Node(xml): + """ + Convert supplied textual string into XML node. Handy f.e. for reading + configuration file. Raises xml.parser.expat.parsererror if provided string + is not well-formed XML + """ + return NodeBuilder(xml).getDom() + +def BadXML2Node(xml): + """ + Convert supplied textual string into XML node. Survives if xml data is + cutted half way round. I.e. "<html>some text <br>some more text". Will raise + xml.parser.expat.parsererror on misplaced tags though. F.e. "<b>some text + <br>some more text</b>" will not work + """ + return NodeBuilder(xml).getDom() |