1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 """
18 Simplexml module provides xmpppy library with all needed tools to handle XML
19 nodes and XML streams. I'm personally using it in many other separate
20 projects. It is designed to be as standalone as possible
21 """
22
23 import xml.parsers.expat
24 import logging
25 log = logging.getLogger('nbxmpp.simplexml')
26
28 """
29 Return provided string with symbols & < > " replaced by their respective XML
30 entities
31 """
32
33 return txt.replace("&", "&").replace("<", "<").replace(">", ">").replace('"', """).replace(u'\x0C', "").replace(u'\x1B', "")
34
35 ENCODING='utf-8'
36
38 """
39 Converts object "what" to unicode string using it's own __str__ method if
40 accessible or unicode method otherwise
41 """
42 if isinstance(what, unicode):
43 return what
44 try:
45 r = what.__str__()
46 except AttributeError:
47 r = str(what)
48 if not isinstance(r, unicode):
49 return unicode(r, ENCODING)
50 return r
51
53 """
54 Node class describes syntax of separate XML Node. It have a constructor that
55 permits node creation from set of "namespace name", attributes and payload
56 of text strings and other nodes. It does not natively support building node
57 from text string and uses NodeBuilder class for that purpose. After
58 creation node can be mangled in many ways so it can be completely changed.
59 Also node can be serialised into string in one of two modes: default (where
60 the textual representation of node describes it exactly) and "fancy" - with
61 whitespace added to make indentation and thus make result more readable by
62 human.
63
64 Node class have attribute FORCE_NODE_RECREATION that is defaults to False
65 thus enabling fast node replication from the some other node. The drawback
66 of the fast way is that new node shares some info with the "original" node
67 that is changing the one node may influence the other. Though it is rarely
68 needed (in xmpppy it is never needed at all since I'm usually never using
69 original node after replication (and using replication only to move upwards
70 on the classes tree).
71 """
72
73 FORCE_NODE_RECREATION = 0
74
75 - def __init__(self, tag=None, attrs={}, payload=[], parent=None, nsp=None,
76 node_built=False, node=None):
77 """
78 Takes "tag" argument as the name of node (prepended by namespace, if
79 needed and separated from it by a space), attrs dictionary as the set of
80 arguments, payload list as the set of textual strings and child nodes
81 that this node carries within itself and "parent" argument that is
82 another node that this one will be the child of. Also the __init__ can be
83 provided with "node" argument that is either a text string containing
84 exactly one node or another Node instance to begin with. If both "node"
85 and other arguments is provided then the node initially created as
86 replica of "node" provided and then modified to be compliant with other
87 arguments.
88 """
89 if node:
90 if self.FORCE_NODE_RECREATION and isinstance(node, Node):
91 node = str(node)
92 if not isinstance(node, Node):
93 node = NodeBuilder(node, self)
94 node_built = True
95 else:
96 self.name, self.namespace, self.attrs, self.data, self.kids, self.parent, self.nsd = node.name, node.namespace, {}, [], [], node.parent, {}
97 for key in node.attrs.keys():
98 self.attrs[key] = node.attrs[key]
99 for data in node.data:
100 self.data.append(data)
101 for kid in node.kids:
102 self.kids.append(kid)
103 for k, v in node.nsd.items():
104 self.nsd[k] = v
105 else:
106 self.name, self.namespace, self.attrs, self.data, self.kids, self.parent, self.nsd = 'tag', '', {}, [], [], None, {}
107 if parent:
108 self.parent = parent
109 self.nsp_cache = {}
110 if nsp:
111 for k, v in nsp.items(): self.nsp_cache[k] = v
112 for attr, val in attrs.items():
113 if attr == 'xmlns':
114 self.nsd[u''] = val
115 elif attr.startswith('xmlns:'):
116 self.nsd[attr[6:]] = val
117 self.attrs[attr]=attrs[attr]
118 if tag:
119 if node_built:
120 pfx, self.name = (['']+tag.split(':'))[-2:]
121 self.namespace = self.lookup_nsp(pfx)
122 else:
123 if ' ' in tag:
124 self.namespace, self.name = tag.split()
125 else:
126 self.name = tag
127 if isinstance(payload, basestring): payload=[payload]
128 for i in payload:
129 if isinstance(i, Node):
130 self.addChild(node=i)
131 else:
132 self.data.append(ustr(i))
133
135 ns = self.nsd.get(pfx, None)
136 if ns is None:
137 ns = self.nsp_cache.get(pfx, None)
138 if ns is None:
139 if self.parent:
140 ns = self.parent.lookup_nsp(pfx)
141 self.nsp_cache[pfx] = ns
142 else:
143 return 'http://www.gajim.org/xmlns/undeclared'
144 return ns
145
147 """
148 Method used to dump node into textual representation. If "fancy" argument
149 is set to True produces indented output for readability
150 """
151 s = (fancy-1) * 2 * ' ' + "<" + self.name
152 if self.namespace:
153 if not self.parent or self.parent.namespace!=self.namespace:
154 if 'xmlns' not in self.attrs:
155 s = s + ' xmlns="%s"'%self.namespace
156 for key in self.attrs.keys():
157 val = ustr(self.attrs[key])
158 s = s + ' %s="%s"' % ( key, XMLescape(val) )
159 s = s + ">"
160 cnt = 0
161 if self.kids:
162 if fancy: s = s + "\n"
163 for a in self.kids:
164 if not fancy and (len(self.data)-1)>=cnt: s=s+XMLescape(self.data[cnt])
165 elif (len(self.data)-1)>=cnt: s=s+XMLescape(self.data[cnt].strip())
166 if isinstance(a, str) or isinstance(a, unicode):
167 s = s + a.__str__()
168 else:
169 s = s + a.__str__(fancy and fancy+1)
170 cnt=cnt+1
171 if not fancy and (len(self.data)-1) >= cnt: s = s + XMLescape(self.data[cnt])
172 elif (len(self.data)-1) >= cnt: s = s + XMLescape(self.data[cnt].strip())
173 if not self.kids and s.endswith('>'):
174 s=s[:-1]+' />'
175 if fancy: s = s + "\n"
176 else:
177 if fancy and not self.data: s = s + (fancy-1) * 2 * ' '
178 s = s + "</" + self.name + ">"
179 if fancy: s = s + "\n"
180 return s
181
182 - def addChild(self, name=None, attrs={}, payload=[], namespace=None, node=None):
183 """
184 If "node" argument is provided, adds it as child node. Else creates new
185 node from the other arguments' values and adds it as well
186 """
187 if 'xmlns' in attrs:
188 raise AttributeError("Use namespace=x instead of attrs={'xmlns':x}")
189 if node:
190 newnode=node
191 node.parent = self
192 else: newnode=Node(tag=name, parent=self, attrs=attrs, payload=payload)
193 if namespace:
194 newnode.setNamespace(namespace)
195 self.kids.append(newnode)
196 return newnode
197
199 """
200 Add some CDATA to node
201 """
202 self.data.append(ustr(data))
203
205 """
206 Remove all CDATA from the node
207 """
208 self.data = []
209
211 """
212 Delete an attribute "key"
213 """
214 del self.attrs[key]
215
217 """
218 Delete the "node" from the node's childs list, if "node" is an instance.
219 Else delete the first node that have specified name and (optionally)
220 attributes
221 """
222 if not isinstance(node, Node):
223 node = self.getTag(node, attrs)
224 self.kids.remove(node)
225 return node
226
228 """
229 Return all node's attributes as dictionary
230 """
231 return self.attrs
232
234 """
235 Return value of specified attribute
236 """
237 return self.attrs.get(key)
238
240 """
241 Return all node's child nodes as list
242 """
243 return self.kids
244
246 """
247 Return all node CDATA as string (concatenated)
248 """
249 return ''.join(self.data)
250
252 """
253 Return the name of node
254 """
255 return self.name
256
258 """
259 Return the namespace of node
260 """
261 return self.namespace
262
264 """
265 Returns the parent of node (if present)
266 """
267 return self.parent
268
270 """
271 Return the payload of node i.e. list of child nodes and CDATA entries.
272 F.e. for "<node>text1<nodea/><nodeb/> text2</node>" will be returned
273 list: ['text1', <nodea instance>, <nodeb instance>, ' text2']
274 """
275 ret = []
276 for i in range(len(self.kids)+len(self.data)+1):
277 try:
278 if self.data[i]:
279 ret.append(self.data[i])
280 except IndexError:
281 pass
282 try:
283 ret.append(self.kids[i])
284 except IndexError:
285 pass
286 return ret
287
288 - def getTag(self, name, attrs={}, namespace=None):
289 """
290 Filter all child nodes using specified arguments as filter. Return the
291 first found or None if not found
292 """
293 return self.getTags(name, attrs, namespace, one=1)
294
296 """
297 Return attribute value of the child with specified name (or None if no
298 such attribute)
299 """
300 try:
301 return self.getTag(tag).attrs[attr]
302 except:
303 return None
304
306 """
307 Return cocatenated CDATA of the child with specified name
308 """
309 try:
310 return self.getTag(tag).getData()
311 except Exception:
312 return None
313
333
348
350 """
351 Set attribute "key" with the value "val"
352 """
353 self.attrs[key] = val
354
356 """
357 Set node's CDATA to provided string. Resets all previous CDATA!
358 """
359 self.data = [ustr(data)]
360
362 """
363 Change the node name
364 """
365 self.name = val
366
368 """
369 Changes the node namespace
370 """
371 self.namespace = namespace
372
374 """
375 Set node's parent to "node". WARNING: do not checks if the parent already
376 present and not removes the node from the list of childs of previous
377 parent
378 """
379 self.parent = node
380
382 """
383 Set node payload according to the list specified. WARNING: completely
384 replaces all node's previous content. If you wish just to add child or
385 CDATA - use addData or addChild methods
386 """
387 if isinstance(payload, basestring):
388 payload = [payload]
389 if add:
390 self.kids += payload
391 else:
392 self.kids = payload
393
394 - def setTag(self, name, attrs={}, namespace=None):
395 """
396 Same as getTag but if the node with specified namespace/attributes not
397 found, creates such node and returns it
398 """
399 node = self.getTags(name, attrs, namespace=namespace, one=1)
400 if node:
401 return node
402 else:
403 return self.addChild(name, attrs, namespace=namespace)
404
406 """
407 Create new node (if not already present) with name "tag" and set it's
408 attribute "attr" to value "val"
409 """
410 try:
411 self.getTag(tag).attrs[attr] = val
412 except Exception:
413 self.addChild(tag, attrs={attr: val})
414
416 """
417 Creates new node (if not already present) with name "tag" and
418 (optionally) attributes "attrs" and sets it's CDATA to string "val"
419 """
420 try:
421 self.getTag(tag, attrs).setData(ustr(val))
422 except Exception:
423 self.addChild(tag, attrs, payload = [ustr(val)])
424
426 """
427 Check if node have attribute "key"
428 """
429 return key in self.attrs
430
432 """
433 Return node's attribute "item" value
434 """
435 return self.getAttr(item)
436
438 """
439 Set node's attribute "item" value
440 """
441 return self.setAttr(item, val)
442
444 """
445 Delete node's attribute "item"
446 """
447 return self.delAttr(item)
448
450 """
451 Check if node has attribute "item"
452 """
453 return self.has_attr(item)
454
456 """
457 Reduce memory usage caused by T/NT classes - use memory only when needed
458 """
459 if attr == 'T':
460 self.T = T(self)
461 return self.T
462 if attr == 'NT':
463 self.NT = NT(self)
464 return self.NT
465 raise AttributeError
466
468 """
469 Auxiliary class used to quick access to node's child nodes
470 """
471
473 self.__dict__['node'] = node
474
476 return self.node.setTag(attr)
477
483
486
488 """
489 Auxiliary class used to quick create node's child nodes
490 """
491
494
496 if isinstance(val, Node):
497 self.node.addChild(attr, node=val)
498 else:
499 return self.node.addChild(attr, payload=[val])
500
502 """
503 Builds a Node class minidom from data parsed to it. This class used for two
504 purposes:
505
506 1. Creation an XML Node from a textual representation. F.e. reading a
507 config file. See an XML2Node method.
508 2. Handling an incoming XML stream. This is done by mangling the
509 __dispatch_depth parameter and redefining the dispatch method.
510
511 You do not need to use this class directly if you do not designing your own
512 XML handler
513 """
514
515 - def __init__(self, data=None, initial_node=None):
516 """
517 Take two optional parameters: "data" and "initial_node"
518
519 By default class initialised with empty Node class instance. Though, if
520 "initial_node" is provided it used as "starting point". You can think
521 about it as of "node upgrade". "data" (if provided) feeded to parser
522 immidiatedly after instance init.
523 """
524 log.debug("Preparing to handle incoming XML stream.")
525 self._parser = xml.parsers.expat.ParserCreate()
526 self._parser.StartElementHandler = self.starttag
527 self._parser.EndElementHandler = self.endtag
528 self._parser.StartNamespaceDeclHandler = self.handle_namespace_start
529 self._parser.CharacterDataHandler = self.handle_cdata
530 self._parser.buffer_text = True
531 self.Parse = self._parser.Parse
532
533 self.__depth = 0
534 self.__last_depth = 0
535 self.__max_depth = 0
536 self._dispatch_depth = 1
537 self._document_attrs = None
538 self._document_nsp = None
539 self._mini_dom=initial_node
540 self.last_is_data = 1
541 self._ptr=None
542 self.data_buffer = None
543 self.streamError = ''
544 if data:
545 self._parser.Parse(data, 1)
546
548 if self.data_buffer:
549 self._ptr.data.append(''.join(self.data_buffer))
550 del self.data_buffer[:]
551 self.data_buffer = None
552
554 """
555 Method used to allow class instance to be garbage-collected
556 """
557 self.check_data_buffer()
558 self._parser.StartElementHandler = None
559 self._parser.EndElementHandler = None
560 self._parser.CharacterDataHandler = None
561 self._parser.StartNamespaceDeclHandler = None
562
564 """
565 XML Parser callback. Used internally
566 """
567 self.check_data_buffer()
568 self._inc_depth()
569 log.info("STARTTAG.. DEPTH -> %i , tag -> %s, attrs -> %s" % (self.__depth, tag, `attrs`))
570 if self.__depth == self._dispatch_depth:
571 if not self._mini_dom :
572 self._mini_dom = Node(tag=tag, attrs=attrs, nsp = self._document_nsp, node_built=True)
573 else:
574 Node.__init__(self._mini_dom, tag=tag, attrs=attrs, nsp = self._document_nsp, node_built=True)
575 self._ptr = self._mini_dom
576 elif self.__depth > self._dispatch_depth:
577 self._ptr.kids.append(Node(tag=tag, parent=self._ptr, attrs=attrs, node_built=True))
578 self._ptr = self._ptr.kids[-1]
579 if self.__depth == 1:
580 self._document_attrs = {}
581 self._document_nsp = {}
582 nsp, name = (['']+tag.split(':'))[-2:]
583 for attr, val in attrs.items():
584 if attr == 'xmlns':
585 self._document_nsp[u''] = val
586 elif attr.startswith('xmlns:'):
587 self._document_nsp[attr[6:]] = val
588 else:
589 self._document_attrs[attr] = val
590 ns = self._document_nsp.get(nsp, 'http://www.gajim.org/xmlns/undeclared-root')
591 try:
592 self.stream_header_received(ns, name, attrs)
593 except ValueError, e:
594 self._document_attrs = None
595 raise ValueError(str(e))
596 if not self.last_is_data and self._ptr.parent:
597 self._ptr.parent.data.append('')
598 self.last_is_data = 0
599
601 """
602 XML Parser callback. Used internally
603 """
604 log.info("DEPTH -> %i , tag -> %s" % (self.__depth, tag))
605 self.check_data_buffer()
606 if self.__depth == self._dispatch_depth:
607 if self._mini_dom.getName() == 'error':
608 children = self._mini_dom.getChildren()
609 if children:
610 self.streamError = children[0].getName()
611 else:
612 self.streamError = self._mini_dom.getData()
613 self.dispatch(self._mini_dom)
614 elif self.__depth > self._dispatch_depth:
615 self._ptr = self._ptr.parent
616 else:
617 log.info("Got higher than dispatch level. Stream terminated?")
618 self._dec_depth()
619 self.last_is_data = 0
620 if self.__depth == 0: self.stream_footer_received()
621
623 if self.last_is_data:
624 if self.data_buffer:
625 self.data_buffer.append(data)
626 elif self._ptr:
627 self.data_buffer = [data]
628 self.last_is_data = 1
629
631 """
632 XML Parser callback. Used internally
633 """
634 self.check_data_buffer()
635
637 """
638 Return just built Node
639 """
640 self.check_data_buffer()
641 return self._mini_dom
642
644 """
645 Get called when the NodeBuilder reaches some level of depth on it's way
646 up with the built node as argument. Can be redefined to convert incoming
647 XML stanzas to program events
648 """
649 pass
650
652 """
653 Method called when stream just opened
654 """
655 self.check_data_buffer()
656
662
664 """
665 Return True if at least one end tag was seen (at level)
666 """
667 return self.__depth <= level and self.__max_depth > level
668
670 self.__last_depth = self.__depth
671 self.__depth += 1
672 self.__max_depth = max(self.__depth, self.__max_depth)
673
675 self.__last_depth = self.__depth
676 self.__depth -= 1
677
679 """
680 Convert supplied textual string into XML node. Handy f.e. for reading
681 configuration file. Raises xml.parser.expat.parsererror if provided string
682 is not well-formed XML
683 """
684 return NodeBuilder(xml).getDom()
685
687 """
688 Convert supplied textual string into XML node. Survives if xml data is
689 cutted half way round. I.e. "<html>some text <br>some more text". Will raise
690 xml.parser.expat.parsererror on misplaced tags though. F.e. "<b>some text
691 <br>some more text</b>" will not work
692 """
693 return NodeBuilder(xml).getDom()
694