Package nbxmpp :: Module simplexml
[hide private]
[frames] | no frames]

Source Code for Module nbxmpp.simplexml

  1  ##   simplexml.py based on Mattew Allum's xmlstream.py 
  2  ## 
  3  ##   Copyright (C) 2003-2005 Alexey "Snake" Nezhdanov 
  4  ## 
  5  ##   This program is free software; you can redistribute it and/or modify 
  6  ##   it under the terms of the GNU General Public License as published by 
  7  ##   the Free Software Foundation; either version 2, or (at your option) 
  8  ##   any later version. 
  9  ## 
 10  ##   This program is distributed in the hope that it will be useful, 
 11  ##   but WITHOUT ANY WARRANTY; without even the implied warranty of 
 12  ##   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 13  ##   GNU General Public License for more details. 
 14   
 15  # $Id: simplexml.py,v 1.27 2005/04/30 07:20:27 snakeru Exp $ 
 16   
 17  """ 
 18  Simplexml module provides xmpppy library with all needed tools to handle XML 
 19  nodes and XML streams. I'm personally using it in many other separate 
 20  projects. It is designed to be as standalone as possible 
 21  """ 
 22   
 23  import xml.parsers.expat 
 24  import logging 
 25  log = logging.getLogger('nbxmpp.simplexml') 
 26   
27 -def XMLescape(txt):
28 """ 29 Return provided string with symbols & < > " replaced by their respective XML 30 entities 31 """ 32 # replace also FORM FEED and ESC, because they are not valid XML chars 33 return txt.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;").replace('"', "&quot;").replace(u'\x0C', "").replace(u'\x1B', "")
34 35 ENCODING='utf-8' 36
37 -def ustr(what):
38 """ 39 Converts object "what" to unicode string using it's own __str__ method if 40 accessible or unicode method otherwise 41 """ 42 if isinstance(what, unicode): 43 return what 44 try: 45 r = what.__str__() 46 except AttributeError: 47 r = str(what) 48 if not isinstance(r, unicode): 49 return unicode(r, ENCODING) 50 return r
51
52 -class Node(object):
53 """ 54 Node class describes syntax of separate XML Node. It have a constructor that 55 permits node creation from set of "namespace name", attributes and payload 56 of text strings and other nodes. It does not natively support building node 57 from text string and uses NodeBuilder class for that purpose. After 58 creation node can be mangled in many ways so it can be completely changed. 59 Also node can be serialised into string in one of two modes: default (where 60 the textual representation of node describes it exactly) and "fancy" - with 61 whitespace added to make indentation and thus make result more readable by 62 human. 63 64 Node class have attribute FORCE_NODE_RECREATION that is defaults to False 65 thus enabling fast node replication from the some other node. The drawback 66 of the fast way is that new node shares some info with the "original" node 67 that is changing the one node may influence the other. Though it is rarely 68 needed (in xmpppy it is never needed at all since I'm usually never using 69 original node after replication (and using replication only to move upwards 70 on the classes tree). 71 """ 72 73 FORCE_NODE_RECREATION = 0 74
75 - def __init__(self, tag=None, attrs={}, payload=[], parent=None, nsp=None, 76 node_built=False, node=None):
77 """ 78 Takes "tag" argument as the name of node (prepended by namespace, if 79 needed and separated from it by a space), attrs dictionary as the set of 80 arguments, payload list as the set of textual strings and child nodes 81 that this node carries within itself and "parent" argument that is 82 another node that this one will be the child of. Also the __init__ can be 83 provided with "node" argument that is either a text string containing 84 exactly one node or another Node instance to begin with. If both "node" 85 and other arguments is provided then the node initially created as 86 replica of "node" provided and then modified to be compliant with other 87 arguments. 88 """ 89 if node: 90 if self.FORCE_NODE_RECREATION and isinstance(node, Node): 91 node = str(node) 92 if not isinstance(node, Node): 93 node = NodeBuilder(node, self) 94 node_built = True 95 else: 96 self.name, self.namespace, self.attrs, self.data, self.kids, self.parent, self.nsd = node.name, node.namespace, {}, [], [], node.parent, {} 97 for key in node.attrs.keys(): 98 self.attrs[key] = node.attrs[key] 99 for data in node.data: 100 self.data.append(data) 101 for kid in node.kids: 102 self.kids.append(kid) 103 for k, v in node.nsd.items(): 104 self.nsd[k] = v 105 else: 106 self.name, self.namespace, self.attrs, self.data, self.kids, self.parent, self.nsd = 'tag', '', {}, [], [], None, {} 107 if parent: 108 self.parent = parent 109 self.nsp_cache = {} 110 if nsp: 111 for k, v in nsp.items(): self.nsp_cache[k] = v 112 for attr, val in attrs.items(): 113 if attr == 'xmlns': 114 self.nsd[u''] = val 115 elif attr.startswith('xmlns:'): 116 self.nsd[attr[6:]] = val 117 self.attrs[attr]=attrs[attr] 118 if tag: 119 if node_built: 120 pfx, self.name = (['']+tag.split(':'))[-2:] 121 self.namespace = self.lookup_nsp(pfx) 122 else: 123 if ' ' in tag: 124 self.namespace, self.name = tag.split() 125 else: 126 self.name = tag 127 if isinstance(payload, basestring): payload=[payload] 128 for i in payload: 129 if isinstance(i, Node): 130 self.addChild(node=i) 131 else: 132 self.data.append(ustr(i))
133
134 - def lookup_nsp(self, pfx=''):
135 ns = self.nsd.get(pfx, None) 136 if ns is None: 137 ns = self.nsp_cache.get(pfx, None) 138 if ns is None: 139 if self.parent: 140 ns = self.parent.lookup_nsp(pfx) 141 self.nsp_cache[pfx] = ns 142 else: 143 return 'http://www.gajim.org/xmlns/undeclared' 144 return ns
145
146 - def __str__(self, fancy=0):
147 """ 148 Method used to dump node into textual representation. If "fancy" argument 149 is set to True produces indented output for readability 150 """ 151 s = (fancy-1) * 2 * ' ' + "<" + self.name 152 if self.namespace: 153 if not self.parent or self.parent.namespace!=self.namespace: 154 if 'xmlns' not in self.attrs: 155 s = s + ' xmlns="%s"'%self.namespace 156 for key in self.attrs.keys(): 157 val = ustr(self.attrs[key]) 158 s = s + ' %s="%s"' % ( key, XMLescape(val) ) 159 s = s + ">" 160 cnt = 0 161 if self.kids: 162 if fancy: s = s + "\n" 163 for a in self.kids: 164 if not fancy and (len(self.data)-1)>=cnt: s=s+XMLescape(self.data[cnt]) 165 elif (len(self.data)-1)>=cnt: s=s+XMLescape(self.data[cnt].strip()) 166 if isinstance(a, str) or isinstance(a, unicode): 167 s = s + a.__str__() 168 else: 169 s = s + a.__str__(fancy and fancy+1) 170 cnt=cnt+1 171 if not fancy and (len(self.data)-1) >= cnt: s = s + XMLescape(self.data[cnt]) 172 elif (len(self.data)-1) >= cnt: s = s + XMLescape(self.data[cnt].strip()) 173 if not self.kids and s.endswith('>'): 174 s=s[:-1]+' />' 175 if fancy: s = s + "\n" 176 else: 177 if fancy and not self.data: s = s + (fancy-1) * 2 * ' ' 178 s = s + "</" + self.name + ">" 179 if fancy: s = s + "\n" 180 return s
181
182 - def addChild(self, name=None, attrs={}, payload=[], namespace=None, node=None):
183 """ 184 If "node" argument is provided, adds it as child node. Else creates new 185 node from the other arguments' values and adds it as well 186 """ 187 if 'xmlns' in attrs: 188 raise AttributeError("Use namespace=x instead of attrs={'xmlns':x}") 189 if node: 190 newnode=node 191 node.parent = self 192 else: newnode=Node(tag=name, parent=self, attrs=attrs, payload=payload) 193 if namespace: 194 newnode.setNamespace(namespace) 195 self.kids.append(newnode) 196 return newnode
197
198 - def addData(self, data):
199 """ 200 Add some CDATA to node 201 """ 202 self.data.append(ustr(data))
203
204 - def clearData(self):
205 """ 206 Remove all CDATA from the node 207 """ 208 self.data = []
209
210 - def delAttr(self, key):
211 """ 212 Delete an attribute "key" 213 """ 214 del self.attrs[key]
215
216 - def delChild(self, node, attrs={}):
217 """ 218 Delete the "node" from the node's childs list, if "node" is an instance. 219 Else delete the first node that have specified name and (optionally) 220 attributes 221 """ 222 if not isinstance(node, Node): 223 node = self.getTag(node, attrs) 224 self.kids.remove(node) 225 return node
226
227 - def getAttrs(self):
228 """ 229 Return all node's attributes as dictionary 230 """ 231 return self.attrs
232
233 - def getAttr(self, key):
234 """ 235 Return value of specified attribute 236 """ 237 return self.attrs.get(key)
238
239 - def getChildren(self):
240 """ 241 Return all node's child nodes as list 242 """ 243 return self.kids
244
245 - def getData(self):
246 """ 247 Return all node CDATA as string (concatenated) 248 """ 249 return ''.join(self.data)
250
251 - def getName(self):
252 """ 253 Return the name of node 254 """ 255 return self.name
256
257 - def getNamespace(self):
258 """ 259 Return the namespace of node 260 """ 261 return self.namespace
262
263 - def getParent(self):
264 """ 265 Returns the parent of node (if present) 266 """ 267 return self.parent
268
269 - def getPayload(self):
270 """ 271 Return the payload of node i.e. list of child nodes and CDATA entries. 272 F.e. for "<node>text1<nodea/><nodeb/> text2</node>" will be returned 273 list: ['text1', <nodea instance>, <nodeb instance>, ' text2'] 274 """ 275 ret = [] 276 for i in range(len(self.kids)+len(self.data)+1): 277 try: 278 if self.data[i]: 279 ret.append(self.data[i]) 280 except IndexError: 281 pass 282 try: 283 ret.append(self.kids[i]) 284 except IndexError: 285 pass 286 return ret
287
288 - def getTag(self, name, attrs={}, namespace=None):
289 """ 290 Filter all child nodes using specified arguments as filter. Return the 291 first found or None if not found 292 """ 293 return self.getTags(name, attrs, namespace, one=1)
294
295 - def getTagAttr(self, tag, attr):
296 """ 297 Return attribute value of the child with specified name (or None if no 298 such attribute) 299 """ 300 try: 301 return self.getTag(tag).attrs[attr] 302 except: 303 return None
304
305 - def getTagData(self, tag):
306 """ 307 Return cocatenated CDATA of the child with specified name 308 """ 309 try: 310 return self.getTag(tag).getData() 311 except Exception: 312 return None
313
314 - def getTags(self, name, attrs={}, namespace=None, one=0):
315 """ 316 Filter all child nodes using specified arguments as filter. Returns the 317 list of nodes found 318 """ 319 nodes = [] 320 for node in self.kids: 321 if namespace and namespace != node.getNamespace(): 322 continue 323 if node.getName() == name: 324 for key in attrs.keys(): 325 if key not in node.attrs or node.attrs[key]!=attrs[key]: 326 break 327 else: 328 nodes.append(node) 329 if one and nodes: 330 return nodes[0] 331 if not one: 332 return nodes
333
334 - def iterTags(self, name, attrs={}, namespace=None):
335 """ 336 Iterate over all children using specified arguments as filter 337 """ 338 for node in self.kids: 339 if namespace is not None and namespace != node.getNamespace(): 340 continue 341 if node.getName() == name: 342 for key in attrs.keys(): 343 if key not in node.attrs or \ 344 node.attrs[key]!=attrs[key]: 345 break 346 else: 347 yield node
348
349 - def setAttr(self, key, val):
350 """ 351 Set attribute "key" with the value "val" 352 """ 353 self.attrs[key] = val
354
355 - def setData(self, data):
356 """ 357 Set node's CDATA to provided string. Resets all previous CDATA! 358 """ 359 self.data = [ustr(data)]
360
361 - def setName(self, val):
362 """ 363 Change the node name 364 """ 365 self.name = val
366
367 - def setNamespace(self, namespace):
368 """ 369 Changes the node namespace 370 """ 371 self.namespace = namespace
372
373 - def setParent(self, node):
374 """ 375 Set node's parent to "node". WARNING: do not checks if the parent already 376 present and not removes the node from the list of childs of previous 377 parent 378 """ 379 self.parent = node
380
381 - def setPayload(self, payload, add=0):
382 """ 383 Set node payload according to the list specified. WARNING: completely 384 replaces all node's previous content. If you wish just to add child or 385 CDATA - use addData or addChild methods 386 """ 387 if isinstance(payload, basestring): 388 payload = [payload] 389 if add: 390 self.kids += payload 391 else: 392 self.kids = payload
393
394 - def setTag(self, name, attrs={}, namespace=None):
395 """ 396 Same as getTag but if the node with specified namespace/attributes not 397 found, creates such node and returns it 398 """ 399 node = self.getTags(name, attrs, namespace=namespace, one=1) 400 if node: 401 return node 402 else: 403 return self.addChild(name, attrs, namespace=namespace)
404
405 - def setTagAttr(self, tag, attr, val):
406 """ 407 Create new node (if not already present) with name "tag" and set it's 408 attribute "attr" to value "val" 409 """ 410 try: 411 self.getTag(tag).attrs[attr] = val 412 except Exception: 413 self.addChild(tag, attrs={attr: val})
414
415 - def setTagData(self, tag, val, attrs={}):
416 """ 417 Creates new node (if not already present) with name "tag" and 418 (optionally) attributes "attrs" and sets it's CDATA to string "val" 419 """ 420 try: 421 self.getTag(tag, attrs).setData(ustr(val)) 422 except Exception: 423 self.addChild(tag, attrs, payload = [ustr(val)])
424
425 - def has_attr(self, key):
426 """ 427 Check if node have attribute "key" 428 """ 429 return key in self.attrs
430
431 - def __getitem__(self, item):
432 """ 433 Return node's attribute "item" value 434 """ 435 return self.getAttr(item)
436
437 - def __setitem__(self, item, val):
438 """ 439 Set node's attribute "item" value 440 """ 441 return self.setAttr(item, val)
442
443 - def __delitem__(self, item):
444 """ 445 Delete node's attribute "item" 446 """ 447 return self.delAttr(item)
448
449 - def __contains__(self, item):
450 """ 451 Check if node has attribute "item" 452 """ 453 return self.has_attr(item)
454
455 - def __getattr__(self, attr):
456 """ 457 Reduce memory usage caused by T/NT classes - use memory only when needed 458 """ 459 if attr == 'T': 460 self.T = T(self) 461 return self.T 462 if attr == 'NT': 463 self.NT = NT(self) 464 return self.NT 465 raise AttributeError
466
467 -class T:
468 """ 469 Auxiliary class used to quick access to node's child nodes 470 """ 471
472 - def __init__(self, node):
473 self.__dict__['node'] = node
474
475 - def __getattr__(self, attr):
476 return self.node.setTag(attr)
477
478 - def __setattr__(self, attr, val):
479 if isinstance(val, Node): 480 Node.__init__(self.node.setTag(attr), node=val) 481 else: 482 return self.node.setTagData(attr, val)
483
484 - def __delattr__(self, attr):
485 return self.node.delChild(attr)
486
487 -class NT(T):
488 """ 489 Auxiliary class used to quick create node's child nodes 490 """ 491
492 - def __getattr__(self, attr):
493 return self.node.addChild(attr)
494
495 - def __setattr__(self, attr, val):
496 if isinstance(val, Node): 497 self.node.addChild(attr, node=val) 498 else: 499 return self.node.addChild(attr, payload=[val])
500
501 -class NodeBuilder:
502 """ 503 Builds a Node class minidom from data parsed to it. This class used for two 504 purposes: 505 506 1. Creation an XML Node from a textual representation. F.e. reading a 507 config file. See an XML2Node method. 508 2. Handling an incoming XML stream. This is done by mangling the 509 __dispatch_depth parameter and redefining the dispatch method. 510 511 You do not need to use this class directly if you do not designing your own 512 XML handler 513 """ 514
515 - def __init__(self, data=None, initial_node=None):
516 """ 517 Take two optional parameters: "data" and "initial_node" 518 519 By default class initialised with empty Node class instance. Though, if 520 "initial_node" is provided it used as "starting point". You can think 521 about it as of "node upgrade". "data" (if provided) feeded to parser 522 immidiatedly after instance init. 523 """ 524 log.debug("Preparing to handle incoming XML stream.") 525 self._parser = xml.parsers.expat.ParserCreate() 526 self._parser.StartElementHandler = self.starttag 527 self._parser.EndElementHandler = self.endtag 528 self._parser.StartNamespaceDeclHandler = self.handle_namespace_start 529 self._parser.CharacterDataHandler = self.handle_cdata 530 self._parser.buffer_text = True 531 self.Parse = self._parser.Parse 532 533 self.__depth = 0 534 self.__last_depth = 0 535 self.__max_depth = 0 536 self._dispatch_depth = 1 537 self._document_attrs = None 538 self._document_nsp = None 539 self._mini_dom=initial_node 540 self.last_is_data = 1 541 self._ptr=None 542 self.data_buffer = None 543 self.streamError = '' 544 if data: 545 self._parser.Parse(data, 1)
546
547 - def check_data_buffer(self):
548 if self.data_buffer: 549 self._ptr.data.append(''.join(self.data_buffer)) 550 del self.data_buffer[:] 551 self.data_buffer = None
552
553 - def destroy(self):
554 """ 555 Method used to allow class instance to be garbage-collected 556 """ 557 self.check_data_buffer() 558 self._parser.StartElementHandler = None 559 self._parser.EndElementHandler = None 560 self._parser.CharacterDataHandler = None 561 self._parser.StartNamespaceDeclHandler = None
562
563 - def starttag(self, tag, attrs):
564 """ 565 XML Parser callback. Used internally 566 """ 567 self.check_data_buffer() 568 self._inc_depth() 569 log.info("STARTTAG.. DEPTH -> %i , tag -> %s, attrs -> %s" % (self.__depth, tag, `attrs`)) 570 if self.__depth == self._dispatch_depth: 571 if not self._mini_dom : 572 self._mini_dom = Node(tag=tag, attrs=attrs, nsp = self._document_nsp, node_built=True) 573 else: 574 Node.__init__(self._mini_dom, tag=tag, attrs=attrs, nsp = self._document_nsp, node_built=True) 575 self._ptr = self._mini_dom 576 elif self.__depth > self._dispatch_depth: 577 self._ptr.kids.append(Node(tag=tag, parent=self._ptr, attrs=attrs, node_built=True)) 578 self._ptr = self._ptr.kids[-1] 579 if self.__depth == 1: 580 self._document_attrs = {} 581 self._document_nsp = {} 582 nsp, name = (['']+tag.split(':'))[-2:] 583 for attr, val in attrs.items(): 584 if attr == 'xmlns': 585 self._document_nsp[u''] = val 586 elif attr.startswith('xmlns:'): 587 self._document_nsp[attr[6:]] = val 588 else: 589 self._document_attrs[attr] = val 590 ns = self._document_nsp.get(nsp, 'http://www.gajim.org/xmlns/undeclared-root') 591 try: 592 self.stream_header_received(ns, name, attrs) 593 except ValueError, e: 594 self._document_attrs = None 595 raise ValueError(str(e)) 596 if not self.last_is_data and self._ptr.parent: 597 self._ptr.parent.data.append('') 598 self.last_is_data = 0
599
600 - def endtag(self, tag ):
601 """ 602 XML Parser callback. Used internally 603 """ 604 log.info("DEPTH -> %i , tag -> %s" % (self.__depth, tag)) 605 self.check_data_buffer() 606 if self.__depth == self._dispatch_depth: 607 if self._mini_dom.getName() == 'error': 608 children = self._mini_dom.getChildren() 609 if children: 610 self.streamError = children[0].getName() 611 else: 612 self.streamError = self._mini_dom.getData() 613 self.dispatch(self._mini_dom) 614 elif self.__depth > self._dispatch_depth: 615 self._ptr = self._ptr.parent 616 else: 617 log.info("Got higher than dispatch level. Stream terminated?") 618 self._dec_depth() 619 self.last_is_data = 0 620 if self.__depth == 0: self.stream_footer_received()
621
622 - def handle_cdata(self, data):
623 if self.last_is_data: 624 if self.data_buffer: 625 self.data_buffer.append(data) 626 elif self._ptr: 627 self.data_buffer = [data] 628 self.last_is_data = 1
629
630 - def handle_namespace_start(self, prefix, uri):
631 """ 632 XML Parser callback. Used internally 633 """ 634 self.check_data_buffer()
635
636 - def getDom(self):
637 """ 638 Return just built Node 639 """ 640 self.check_data_buffer() 641 return self._mini_dom
642
643 - def dispatch(self, stanza):
644 """ 645 Get called when the NodeBuilder reaches some level of depth on it's way 646 up with the built node as argument. Can be redefined to convert incoming 647 XML stanzas to program events 648 """ 649 pass
650
651 - def stream_header_received(self, ns, tag, attrs):
652 """ 653 Method called when stream just opened 654 """ 655 self.check_data_buffer()
656 662
663 - def has_received_endtag(self, level=0):
664 """ 665 Return True if at least one end tag was seen (at level) 666 """ 667 return self.__depth <= level and self.__max_depth > level
668
669 - def _inc_depth(self):
670 self.__last_depth = self.__depth 671 self.__depth += 1 672 self.__max_depth = max(self.__depth, self.__max_depth)
673
674 - def _dec_depth(self):
675 self.__last_depth = self.__depth 676 self.__depth -= 1
677
678 -def XML2Node(xml):
679 """ 680 Convert supplied textual string into XML node. Handy f.e. for reading 681 configuration file. Raises xml.parser.expat.parsererror if provided string 682 is not well-formed XML 683 """ 684 return NodeBuilder(xml).getDom()
685
686 -def BadXML2Node(xml):
687 """ 688 Convert supplied textual string into XML node. Survives if xml data is 689 cutted half way round. I.e. "<html>some text <br>some more text". Will raise 690 xml.parser.expat.parsererror on misplaced tags though. F.e. "<b>some text 691 <br>some more text</b>" will not work 692 """ 693 return NodeBuilder(xml).getDom()
694