Package CedarBackup3 :: Module xmlutil
[hide private]
[frames] | no frames]

Source Code for Module CedarBackup3.xmlutil

  1  # -*- coding: iso-8859-1 -*- 
  2  # vim: set ft=python ts=3 sw=3 expandtab: 
  3  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  4  # 
  5  #              C E D A R 
  6  #          S O L U T I O N S       "Software done right." 
  7  #           S O F T W A R E 
  8  # 
  9  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 10  # 
 11  # Copyright (c) 2004-2006,2010,2015 Kenneth J. Pronovici. 
 12  # All rights reserved. 
 13  # 
 14  # Portions Copyright (c) 2000 Fourthought Inc, USA. 
 15  # All Rights Reserved. 
 16  # 
 17  # This program is free software; you can redistribute it and/or 
 18  # modify it under the terms of the GNU General Public License, 
 19  # Version 2, as published by the Free Software Foundation. 
 20  # 
 21  # This program is distributed in the hope that it will be useful, 
 22  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 23  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
 24  # 
 25  # Copies of the GNU General Public License are available from 
 26  # the Free Software Foundation website, http://www.gnu.org/. 
 27  # 
 28  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 29  # 
 30  # Author   : Kenneth J. Pronovici <pronovic@ieee.org> 
 31  # Language : Python 3 (>= 3.4) 
 32  # Project  : Cedar Backup, release 3 
 33  # Purpose  : Provides general XML-related functionality. 
 34  # 
 35  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 36   
 37  ######################################################################## 
 38  # Module documentation 
 39  ######################################################################## 
 40   
 41  """ 
 42  Provides general XML-related functionality. 
 43   
 44  What I'm trying to do here is abstract much of the functionality that directly 
 45  accesses the DOM tree.  This is not so much to "protect" the other code from 
 46  the DOM, but to standardize the way it's used.  It will also help extension 
 47  authors write code that easily looks more like the rest of Cedar Backup. 
 48   
 49  @sort: createInputDom, createOutputDom, serializeDom, isElement, readChildren, 
 50         readFirstChild, readStringList, readString, readInteger, readBoolean, 
 51         addContainerNode, addStringNode, addIntegerNode, addBooleanNode, 
 52         TRUE_BOOLEAN_VALUES, FALSE_BOOLEAN_VALUES, VALID_BOOLEAN_VALUES 
 53   
 54  @var TRUE_BOOLEAN_VALUES: List of boolean values in XML representing C{True}. 
 55  @var FALSE_BOOLEAN_VALUES: List of boolean values in XML representing C{False}. 
 56  @var VALID_BOOLEAN_VALUES: List of valid boolean values in XML. 
 57   
 58  @author: Kenneth J. Pronovici <pronovic@ieee.org> 
 59  """ 
 60  # pylint: disable=C0111,C0103,W0511,W0104,W0106 
 61   
 62  ######################################################################## 
 63  # Imported modules 
 64  ######################################################################## 
 65   
 66  # System modules 
 67  import sys 
 68  import re 
 69  import logging 
 70  from io import StringIO 
 71   
 72  # XML-related modules 
 73  from xml.parsers.expat import ExpatError 
 74  from xml.dom.minidom import Node 
 75  from xml.dom.minidom import getDOMImplementation 
 76  from xml.dom.minidom import parseString 
 77   
 78   
 79  ######################################################################## 
 80  # Module-wide constants and variables 
 81  ######################################################################## 
 82   
 83  logger = logging.getLogger("CedarBackup3.log.xml") 
 84   
 85  TRUE_BOOLEAN_VALUES   = [ "Y", "y", ] 
 86  FALSE_BOOLEAN_VALUES  = [ "N", "n", ] 
 87  VALID_BOOLEAN_VALUES  = TRUE_BOOLEAN_VALUES + FALSE_BOOLEAN_VALUES 
 88   
 89   
 90  ######################################################################## 
 91  # Functions for creating and parsing DOM trees 
 92  ######################################################################## 
 93   
94 -def createInputDom(xmlData, name="cb_config"):
95 """ 96 Creates a DOM tree based on reading an XML string. 97 @param name: Assumed base name of the document (root node name). 98 @return: Tuple (xmlDom, parentNode) for the parsed document 99 @raise ValueError: If the document can't be parsed. 100 """ 101 try: 102 xmlDom = parseString(xmlData) 103 parentNode = readFirstChild(xmlDom, name) 104 return (xmlDom, parentNode) 105 except (IOError, ExpatError) as e: 106 raise ValueError("Unable to parse XML document: %s" % e)
107
108 -def createOutputDom(name="cb_config"):
109 """ 110 Creates a DOM tree used for writing an XML document. 111 @param name: Base name of the document (root node name). 112 @return: Tuple (xmlDom, parentNode) for the new document 113 """ 114 impl = getDOMImplementation() 115 xmlDom = impl.createDocument(None, name, None) 116 return (xmlDom, xmlDom.documentElement)
117 118 119 ######################################################################## 120 # Functions for reading values out of XML documents 121 ######################################################################## 122
123 -def isElement(node):
124 """ 125 Returns True or False depending on whether the XML node is an element node. 126 """ 127 return node.nodeType == Node.ELEMENT_NODE
128
129 -def readChildren(parent, name):
130 """ 131 Returns a list of nodes with a given name immediately beneath the 132 parent. 133 134 By "immediately beneath" the parent, we mean from among nodes that are 135 direct children of the passed-in parent node. 136 137 Underneath, we use the Python C{getElementsByTagName} method, which is 138 pretty cool, but which (surprisingly?) returns a list of all children 139 with a given name below the parent, at any level. We just prune that 140 list to include only children whose C{parentNode} matches the passed-in 141 parent. 142 143 @param parent: Parent node to search beneath. 144 @param name: Name of nodes to search for. 145 146 @return: List of child nodes with correct parent, or an empty list if 147 no matching nodes are found. 148 """ 149 lst = [] 150 if parent is not None: 151 result = parent.getElementsByTagName(name) 152 for entry in result: 153 if entry.parentNode is parent: 154 lst.append(entry) 155 return lst
156
157 -def readFirstChild(parent, name):
158 """ 159 Returns the first child with a given name immediately beneath the parent. 160 161 By "immediately beneath" the parent, we mean from among nodes that are 162 direct children of the passed-in parent node. 163 164 @param parent: Parent node to search beneath. 165 @param name: Name of node to search for. 166 167 @return: First properly-named child of parent, or C{None} if no matching nodes are found. 168 """ 169 result = readChildren(parent, name) 170 if result is None or result == []: 171 return None 172 return result[0]
173
174 -def readStringList(parent, name):
175 """ 176 Returns a list of the string contents associated with nodes with a given 177 name immediately beneath the parent. 178 179 By "immediately beneath" the parent, we mean from among nodes that are 180 direct children of the passed-in parent node. 181 182 First, we find all of the nodes using L{readChildren}, and then we 183 retrieve the "string contents" of each of those nodes. The returned list 184 has one entry per matching node. We assume that string contents of a 185 given node belong to the first C{TEXT_NODE} child of that node. Nodes 186 which have no C{TEXT_NODE} children are not represented in the returned 187 list. 188 189 @param parent: Parent node to search beneath. 190 @param name: Name of node to search for. 191 192 @return: List of strings as described above, or C{None} if no matching nodes are found. 193 """ 194 lst = [] 195 result = readChildren(parent, name) 196 for entry in result: 197 if entry.hasChildNodes(): 198 for child in entry.childNodes: 199 if child.nodeType == Node.TEXT_NODE: 200 lst.append(child.nodeValue) 201 break 202 if lst == []: 203 lst = None 204 return lst
205
206 -def readString(parent, name):
207 """ 208 Returns string contents of the first child with a given name immediately 209 beneath the parent. 210 211 By "immediately beneath" the parent, we mean from among nodes that are 212 direct children of the passed-in parent node. We assume that string 213 contents of a given node belong to the first C{TEXT_NODE} child of that 214 node. 215 216 @param parent: Parent node to search beneath. 217 @param name: Name of node to search for. 218 219 @return: String contents of node or C{None} if no matching nodes are found. 220 """ 221 result = readStringList(parent, name) 222 if result is None: 223 return None 224 return result[0]
225
226 -def readInteger(parent, name):
227 """ 228 Returns integer contents of the first child with a given name immediately 229 beneath the parent. 230 231 By "immediately beneath" the parent, we mean from among nodes that are 232 direct children of the passed-in parent node. 233 234 @param parent: Parent node to search beneath. 235 @param name: Name of node to search for. 236 237 @return: Integer contents of node or C{None} if no matching nodes are found. 238 @raise ValueError: If the string at the location can't be converted to an integer. 239 """ 240 result = readString(parent, name) 241 if result is None: 242 return None 243 else: 244 return int(result)
245
246 -def readLong(parent, name):
247 """ 248 Returns long integer contents of the first child with a given name immediately 249 beneath the parent. 250 251 By "immediately beneath" the parent, we mean from among nodes that are 252 direct children of the passed-in parent node. 253 254 @param parent: Parent node to search beneath. 255 @param name: Name of node to search for. 256 257 @return: Long integer contents of node or C{None} if no matching nodes are found. 258 @raise ValueError: If the string at the location can't be converted to an integer. 259 """ 260 result = readString(parent, name) 261 if result is None: 262 return None 263 else: 264 return int(result)
265
266 -def readFloat(parent, name):
267 """ 268 Returns float contents of the first child with a given name immediately 269 beneath the parent. 270 271 By "immediately beneath" the parent, we mean from among nodes that are 272 direct children of the passed-in parent node. 273 274 @param parent: Parent node to search beneath. 275 @param name: Name of node to search for. 276 277 @return: Float contents of node or C{None} if no matching nodes are found. 278 @raise ValueError: If the string at the location can't be converted to a 279 float value. 280 """ 281 result = readString(parent, name) 282 if result is None: 283 return None 284 else: 285 return float(result)
286
287 -def readBoolean(parent, name):
288 """ 289 Returns boolean contents of the first child with a given name immediately 290 beneath the parent. 291 292 By "immediately beneath" the parent, we mean from among nodes that are 293 direct children of the passed-in parent node. 294 295 The string value of the node must be one of the values in L{VALID_BOOLEAN_VALUES}. 296 297 @param parent: Parent node to search beneath. 298 @param name: Name of node to search for. 299 300 @return: Boolean contents of node or C{None} if no matching nodes are found. 301 @raise ValueError: If the string at the location can't be converted to a boolean. 302 """ 303 result = readString(parent, name) 304 if result is None: 305 return None 306 else: 307 if result in TRUE_BOOLEAN_VALUES: 308 return True 309 elif result in FALSE_BOOLEAN_VALUES: 310 return False 311 else: 312 raise ValueError("Boolean values must be one of %s." % VALID_BOOLEAN_VALUES)
313 314 315 ######################################################################## 316 # Functions for writing values into XML documents 317 ######################################################################## 318
319 -def addContainerNode(xmlDom, parentNode, nodeName):
320 """ 321 Adds a container node as the next child of a parent node. 322 323 @param xmlDom: DOM tree as from C{impl.createDocument()}. 324 @param parentNode: Parent node to create child for. 325 @param nodeName: Name of the new container node. 326 327 @return: Reference to the newly-created node. 328 """ 329 containerNode = xmlDom.createElement(nodeName) 330 parentNode.appendChild(containerNode) 331 return containerNode
332
333 -def addStringNode(xmlDom, parentNode, nodeName, nodeValue):
334 """ 335 Adds a text node as the next child of a parent, to contain a string. 336 337 If the C{nodeValue} is None, then the node will be created, but will be 338 empty (i.e. will contain no text node child). 339 340 @param xmlDom: DOM tree as from C{impl.createDocument()}. 341 @param parentNode: Parent node to create child for. 342 @param nodeName: Name of the new container node. 343 @param nodeValue: The value to put into the node. 344 345 @return: Reference to the newly-created node. 346 """ 347 containerNode = addContainerNode(xmlDom, parentNode, nodeName) 348 if nodeValue is not None: 349 textNode = xmlDom.createTextNode(nodeValue) 350 containerNode.appendChild(textNode) 351 return containerNode
352
353 -def addIntegerNode(xmlDom, parentNode, nodeName, nodeValue):
354 """ 355 Adds a text node as the next child of a parent, to contain an integer. 356 357 If the C{nodeValue} is None, then the node will be created, but will be 358 empty (i.e. will contain no text node child). 359 360 The integer will be converted to a string using "%d". The result will be 361 added to the document via L{addStringNode}. 362 363 @param xmlDom: DOM tree as from C{impl.createDocument()}. 364 @param parentNode: Parent node to create child for. 365 @param nodeName: Name of the new container node. 366 @param nodeValue: The value to put into the node. 367 368 @return: Reference to the newly-created node. 369 """ 370 if nodeValue is None: 371 return addStringNode(xmlDom, parentNode, nodeName, None) 372 else: 373 return addStringNode(xmlDom, parentNode, nodeName, "%d" % nodeValue) # %d works for both int and long
374
375 -def addLongNode(xmlDom, parentNode, nodeName, nodeValue):
376 """ 377 Adds a text node as the next child of a parent, to contain a long integer. 378 379 If the C{nodeValue} is None, then the node will be created, but will be 380 empty (i.e. will contain no text node child). 381 382 The integer will be converted to a string using "%d". The result will be 383 added to the document via L{addStringNode}. 384 385 @param xmlDom: DOM tree as from C{impl.createDocument()}. 386 @param parentNode: Parent node to create child for. 387 @param nodeName: Name of the new container node. 388 @param nodeValue: The value to put into the node. 389 390 @return: Reference to the newly-created node. 391 """ 392 if nodeValue is None: 393 return addStringNode(xmlDom, parentNode, nodeName, None) 394 else: 395 return addStringNode(xmlDom, parentNode, nodeName, "%d" % nodeValue) # %d works for both int and long
396
397 -def addBooleanNode(xmlDom, parentNode, nodeName, nodeValue):
398 """ 399 Adds a text node as the next child of a parent, to contain a boolean. 400 401 If the C{nodeValue} is None, then the node will be created, but will be 402 empty (i.e. will contain no text node child). 403 404 Boolean C{True}, or anything else interpreted as C{True} by Python, will 405 be converted to a string "Y". Anything else will be converted to a 406 string "N". The result is added to the document via L{addStringNode}. 407 408 @param xmlDom: DOM tree as from C{impl.createDocument()}. 409 @param parentNode: Parent node to create child for. 410 @param nodeName: Name of the new container node. 411 @param nodeValue: The value to put into the node. 412 413 @return: Reference to the newly-created node. 414 """ 415 if nodeValue is None: 416 return addStringNode(xmlDom, parentNode, nodeName, None) 417 else: 418 if nodeValue: 419 return addStringNode(xmlDom, parentNode, nodeName, "Y") 420 else: 421 return addStringNode(xmlDom, parentNode, nodeName, "N")
422 423 424 ######################################################################## 425 # Functions for serializing DOM trees 426 ######################################################################## 427
428 -def serializeDom(xmlDom, indent=3):
429 """ 430 Serializes a DOM tree and returns the result in a string. 431 @param xmlDom: XML DOM tree to serialize 432 @param indent: Number of spaces to indent, as an integer 433 @return: String form of DOM tree, pretty-printed. 434 """ 435 xmlBuffer = StringIO() 436 serializer = Serializer(xmlBuffer, "UTF-8", indent=indent) 437 serializer.serialize(xmlDom) 438 xmlData = xmlBuffer.getvalue() 439 xmlBuffer.close() 440 return xmlData
441
442 -class Serializer(object):
443 444 """ 445 XML serializer class. 446 447 This is a customized serializer that I hacked together based on what I found 448 in the PyXML distribution. Basically, around release 2.7.0, the only reason 449 I still had around a dependency on PyXML was for the PrettyPrint 450 functionality, and that seemed pointless. So, I stripped the PrettyPrint 451 code out of PyXML and hacked bits of it off until it did just what I needed 452 and no more. 453 454 This code started out being called PrintVisitor, but I decided it makes more 455 sense just calling it a serializer. I've made nearly all of the methods 456 private, and I've added a new high-level serialize() method rather than 457 having clients call C{visit()}. 458 459 Anyway, as a consequence of my hacking with it, this can't quite be called a 460 complete XML serializer any more. I ripped out support for HTML and XHTML, 461 and there is also no longer any support for namespaces (which I took out 462 because this dragged along a lot of extra code, and Cedar Backup doesn't use 463 namespaces). However, everything else should pretty much work as expected. 464 465 @copyright: This code, prior to customization, was part of the PyXML 466 codebase, and before that was part of the 4DOM suite developed by 467 Fourthought, Inc. It its original form, it was Copyright (c) 2000 468 Fourthought Inc, USA; All Rights Reserved. 469 """ 470
471 - def __init__(self, stream=sys.stdout, encoding="UTF-8", indent=3):
472 """ 473 Initialize a serializer. 474 @param stream: Stream to write output to. 475 @param encoding: Output encoding. 476 @param indent: Number of spaces to indent, as an integer 477 """ 478 self.stream = stream 479 self.encoding = encoding 480 self._indent = indent * " " 481 self._depth = 0 482 self._inText = 0
483
484 - def serialize(self, xmlDom):
485 """ 486 Serialize the passed-in XML document. 487 @param xmlDom: XML DOM tree to serialize 488 @raise ValueError: If there's an unknown node type in the document. 489 """ 490 self._visit(xmlDom) 491 self.stream.write("\n")
492
493 - def _write(self, text):
494 obj = _encodeText(text, self.encoding) 495 self.stream.write(obj) 496 return
497
498 - def _tryIndent(self):
499 if not self._inText and self._indent: 500 self._write('\n' + self._indent*self._depth) 501 return
502
503 - def _visit(self, node):
504 """ 505 @raise ValueError: If there's an unknown node type in the document. 506 """ 507 if node.nodeType == Node.ELEMENT_NODE: 508 return self._visitElement(node) 509 510 elif node.nodeType == Node.ATTRIBUTE_NODE: 511 return self._visitAttr(node) 512 513 elif node.nodeType == Node.TEXT_NODE: 514 return self._visitText(node) 515 516 elif node.nodeType == Node.CDATA_SECTION_NODE: 517 return self._visitCDATASection(node) 518 519 elif node.nodeType == Node.ENTITY_REFERENCE_NODE: 520 return self._visitEntityReference(node) 521 522 elif node.nodeType == Node.ENTITY_NODE: 523 return self._visitEntity(node) 524 525 elif node.nodeType == Node.PROCESSING_INSTRUCTION_NODE: 526 return self._visitProcessingInstruction(node) 527 528 elif node.nodeType == Node.COMMENT_NODE: 529 return self._visitComment(node) 530 531 elif node.nodeType == Node.DOCUMENT_NODE: 532 return self._visitDocument(node) 533 534 elif node.nodeType == Node.DOCUMENT_TYPE_NODE: 535 return self._visitDocumentType(node) 536 537 elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE: 538 return self._visitDocumentFragment(node) 539 540 elif node.nodeType == Node.NOTATION_NODE: 541 return self._visitNotation(node) 542 543 # It has a node type, but we don't know how to handle it 544 raise ValueError("Unknown node type: %s" % repr(node))
545
546 - def _visitNodeList(self, node, exclude=None):
547 for curr in node: 548 curr is not exclude and self._visit(curr) 549 return
550
551 - def _visitNamedNodeMap(self, node):
552 for item in list(node.values()): 553 self._visit(item) 554 return
555
556 - def _visitAttr(self, node):
557 self._write(' ' + node.name) 558 value = node.value 559 text = _translateCDATA(value, self.encoding) 560 text, delimiter = _translateCDATAAttr(text) 561 self.stream.write("=%s%s%s" % (delimiter, text, delimiter)) 562 return
563
564 - def _visitProlog(self):
565 self._write("<?xml version='1.0' encoding='%s'?>" % (self.encoding or 'utf-8')) 566 self._inText = 0 567 return
568
569 - def _visitDocument(self, node):
570 self._visitProlog() 571 node.doctype and self._visitDocumentType(node.doctype) 572 self._visitNodeList(node.childNodes, exclude=node.doctype) 573 return
574
575 - def _visitDocumentFragment(self, node):
576 self._visitNodeList(node.childNodes) 577 return
578
579 - def _visitElement(self, node):
580 self._tryIndent() 581 self._write('<%s' % node.tagName) 582 for attr in list(node.attributes.values()): 583 self._visitAttr(attr) 584 if len(node.childNodes): 585 self._write('>') 586 self._depth = self._depth + 1 587 self._visitNodeList(node.childNodes) 588 self._depth = self._depth - 1 589 not (self._inText) and self._tryIndent() 590 self._write('</%s>' % node.tagName) 591 else: 592 self._write('/>') 593 self._inText = 0 594 return
595
596 - def _visitText(self, node):
597 text = node.data 598 if self._indent: 599 text.strip() 600 if text: 601 text = _translateCDATA(text, self.encoding) 602 self.stream.write(text) 603 self._inText = 1 604 return
605
606 - def _visitDocumentType(self, doctype):
607 if not doctype.systemId and not doctype.publicId: return 608 self._tryIndent() 609 self._write('<!DOCTYPE %s' % doctype.name) 610 if doctype.systemId and '"' in doctype.systemId: 611 system = "'%s'" % doctype.systemId 612 else: 613 system = '"%s"' % doctype.systemId 614 if doctype.publicId and '"' in doctype.publicId: 615 # We should probably throw an error 616 # Valid characters: <space> | <newline> | <linefeed> | 617 # [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] 618 public = "'%s'" % doctype.publicId 619 else: 620 public = '"%s"' % doctype.publicId 621 if doctype.publicId and doctype.systemId: 622 self._write(' PUBLIC %s %s' % (public, system)) 623 elif doctype.systemId: 624 self._write(' SYSTEM %s' % system) 625 if doctype.entities or doctype.notations: 626 self._write(' [') 627 self._depth = self._depth + 1 628 self._visitNamedNodeMap(doctype.entities) 629 self._visitNamedNodeMap(doctype.notations) 630 self._depth = self._depth - 1 631 self._tryIndent() 632 self._write(']>') 633 else: 634 self._write('>') 635 self._inText = 0 636 return
637
638 - def _visitEntity(self, node):
639 """Visited from a NamedNodeMap in DocumentType""" 640 self._tryIndent() 641 self._write('<!ENTITY %s' % (node.nodeName)) 642 node.publicId and self._write(' PUBLIC %s' % node.publicId) 643 node.systemId and self._write(' SYSTEM %s' % node.systemId) 644 node.notationName and self._write(' NDATA %s' % node.notationName) 645 self._write('>') 646 return
647
648 - def _visitNotation(self, node):
649 """Visited from a NamedNodeMap in DocumentType""" 650 self._tryIndent() 651 self._write('<!NOTATION %s' % node.nodeName) 652 node.publicId and self._write(' PUBLIC %s' % node.publicId) 653 node.systemId and self._write(' SYSTEM %s' % node.systemId) 654 self._write('>') 655 return
656
657 - def _visitCDATASection(self, node):
658 self._tryIndent() 659 self._write('<![CDATA[%s]]>' % (node.data)) 660 self._inText = 0 661 return
662
663 - def _visitComment(self, node):
664 self._tryIndent() 665 self._write('<!--%s-->' % (node.data)) 666 self._inText = 0 667 return
668
669 - def _visitEntityReference(self, node):
670 self._write('&%s;' % node.nodeName) 671 self._inText = 1 672 return
673
674 - def _visitProcessingInstruction(self, node):
675 self._tryIndent() 676 self._write('<?%s %s?>' % (node.target, node.data)) 677 self._inText = 0 678 return
679 680 # pylint: disable=W0613
681 -def _encodeText(text, encoding):
682 """Safely encodes the passed-in text as a Unicode string, converting bytes to UTF-8 if necessary.""" 683 if text is None: 684 return text 685 try: 686 if isinstance(text, bytes): 687 text = str(text, "utf-8") 688 return text 689 except UnicodeError: 690 raise ValueError("Path could not be safely encoded as utf-8.")
691
692 -def _translateCDATAAttr(characters):
693 """ 694 Handles normalization and some intelligence about quoting. 695 696 @copyright: This code, prior to customization, was part of the PyXML 697 codebase, and before that was part of the 4DOM suite developed by 698 Fourthought, Inc. It its original form, it was Copyright (c) 2000 699 Fourthought Inc, USA; All Rights Reserved. 700 """ 701 if not characters: 702 return '', "'" 703 if "'" in characters: 704 delimiter = '"' 705 new_chars = re.sub('"', '&quot;', characters) 706 else: 707 delimiter = "'" 708 new_chars = re.sub("'", '&apos;', characters) 709 #FIXME: There's more to normalization 710 #Convert attribute new-lines to character entity 711 # characters is possibly shorter than new_chars (no entities) 712 if "\n" in characters: 713 new_chars = re.sub('\n', '&#10;', new_chars) 714 return new_chars, delimiter
715 716 #Note: Unicode object only for now
717 -def _translateCDATA(characters, encoding='UTF-8', prev_chars='', markupSafe=0):
718 """ 719 @copyright: This code, prior to customization, was part of the PyXML 720 codebase, and before that was part of the 4DOM suite developed by 721 Fourthought, Inc. It its original form, it was Copyright (c) 2000 722 Fourthought Inc, USA; All Rights Reserved. 723 """ 724 CDATA_CHAR_PATTERN = re.compile('[&<]|]]>') 725 CHAR_TO_ENTITY = { '&': '&amp;', '<': '&lt;', ']]>': ']]&gt;', } 726 ILLEGAL_LOW_CHARS = '[\x01-\x08\x0B-\x0C\x0E-\x1F]' 727 ILLEGAL_HIGH_CHARS = '\xEF\xBF[\xBE\xBF]' 728 XML_ILLEGAL_CHAR_PATTERN = re.compile('%s|%s'%(ILLEGAL_LOW_CHARS, ILLEGAL_HIGH_CHARS)) 729 if not characters: 730 return '' 731 if not markupSafe: 732 if CDATA_CHAR_PATTERN.search(characters): 733 new_string = CDATA_CHAR_PATTERN.subn(lambda m, d=CHAR_TO_ENTITY: d[m.group()], characters)[0] 734 else: 735 new_string = characters 736 if prev_chars[-2:] == ']]' and characters[0] == '>': 737 new_string = '&gt;' + new_string[1:] 738 else: 739 new_string = characters 740 #Note: use decimal char entity rep because some browsers are broken 741 #FIXME: This will bomb for high characters. Should, for instance, detect 742 #The UTF-8 for 0xFFFE and put out &#xFFFE; 743 if XML_ILLEGAL_CHAR_PATTERN.search(new_string): 744 new_string = XML_ILLEGAL_CHAR_PATTERN.subn(lambda m: '&#%i;' % ord(m.group()), new_string)[0] 745 new_string = _encodeText(new_string, encoding) 746 return new_string
747