1
0

ElementTree.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. # defusedxml
  2. #
  3. # Copyright (c) 2013 by Christian Heimes <christian@python.org>
  4. # Licensed to PSF under a Contributor Agreement.
  5. # See https://www.python.org/psf/license for licensing details.
  6. """Defused xml.etree.ElementTree facade
  7. """
  8. from __future__ import print_function, absolute_import
  9. import sys
  10. import warnings
  11. from xml.etree.ElementTree import ParseError
  12. from xml.etree.ElementTree import TreeBuilder as _TreeBuilder
  13. from xml.etree.ElementTree import parse as _parse
  14. from xml.etree.ElementTree import tostring
  15. from .common import PY3
  16. if PY3:
  17. import importlib
  18. else:
  19. from xml.etree.ElementTree import XMLParser as _XMLParser
  20. from xml.etree.ElementTree import iterparse as _iterparse
  21. from .common import (
  22. DTDForbidden,
  23. EntitiesForbidden,
  24. ExternalReferenceForbidden,
  25. _generate_etree_functions,
  26. )
  27. __origin__ = "xml.etree.ElementTree"
  28. def _get_py3_cls():
  29. """Python 3.3 hides the pure Python code but defusedxml requires it.
  30. The code is based on test.support.import_fresh_module().
  31. """
  32. pymodname = "xml.etree.ElementTree"
  33. cmodname = "_elementtree"
  34. pymod = sys.modules.pop(pymodname, None)
  35. cmod = sys.modules.pop(cmodname, None)
  36. sys.modules[cmodname] = None
  37. try:
  38. pure_pymod = importlib.import_module(pymodname)
  39. finally:
  40. # restore module
  41. sys.modules[pymodname] = pymod
  42. if cmod is not None:
  43. sys.modules[cmodname] = cmod
  44. else:
  45. sys.modules.pop(cmodname, None)
  46. # restore attribute on original package
  47. etree_pkg = sys.modules["xml.etree"]
  48. if pymod is not None:
  49. etree_pkg.ElementTree = pymod
  50. elif hasattr(etree_pkg, "ElementTree"):
  51. del etree_pkg.ElementTree
  52. _XMLParser = pure_pymod.XMLParser
  53. _iterparse = pure_pymod.iterparse
  54. # patch pure module to use ParseError from C extension
  55. pure_pymod.ParseError = ParseError
  56. return _XMLParser, _iterparse
  57. if PY3:
  58. _XMLParser, _iterparse = _get_py3_cls()
  59. _sentinel = object()
  60. class DefusedXMLParser(_XMLParser):
  61. def __init__(
  62. self,
  63. html=_sentinel,
  64. target=None,
  65. encoding=None,
  66. forbid_dtd=False,
  67. forbid_entities=True,
  68. forbid_external=True,
  69. ):
  70. # Python 2.x old style class
  71. _XMLParser.__init__(self, target=target, encoding=encoding)
  72. if html is not _sentinel:
  73. # the 'html' argument has been deprecated and ignored in all
  74. # supported versions of Python. Python 3.8 finally removed it.
  75. if html:
  76. raise TypeError("'html=True' is no longer supported.")
  77. else:
  78. warnings.warn(
  79. "'html' keyword argument is no longer supported. Pass "
  80. "in arguments as keyword arguments.",
  81. category=DeprecationWarning,
  82. )
  83. self.forbid_dtd = forbid_dtd
  84. self.forbid_entities = forbid_entities
  85. self.forbid_external = forbid_external
  86. if PY3:
  87. parser = self.parser
  88. else:
  89. parser = self._parser
  90. if self.forbid_dtd:
  91. parser.StartDoctypeDeclHandler = self.defused_start_doctype_decl
  92. if self.forbid_entities:
  93. parser.EntityDeclHandler = self.defused_entity_decl
  94. parser.UnparsedEntityDeclHandler = self.defused_unparsed_entity_decl
  95. if self.forbid_external:
  96. parser.ExternalEntityRefHandler = self.defused_external_entity_ref_handler
  97. def defused_start_doctype_decl(self, name, sysid, pubid, has_internal_subset):
  98. raise DTDForbidden(name, sysid, pubid)
  99. def defused_entity_decl(
  100. self, name, is_parameter_entity, value, base, sysid, pubid, notation_name
  101. ):
  102. raise EntitiesForbidden(name, value, base, sysid, pubid, notation_name)
  103. def defused_unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
  104. # expat 1.2
  105. raise EntitiesForbidden(name, None, base, sysid, pubid, notation_name) # pragma: no cover
  106. def defused_external_entity_ref_handler(self, context, base, sysid, pubid):
  107. raise ExternalReferenceForbidden(context, base, sysid, pubid)
  108. # aliases
  109. # XMLParse is a typo, keep it for backwards compatibility
  110. XMLTreeBuilder = XMLParse = XMLParser = DefusedXMLParser
  111. parse, iterparse, fromstring = _generate_etree_functions(
  112. DefusedXMLParser, _TreeBuilder, _parse, _iterparse
  113. )
  114. XML = fromstring
  115. __all__ = [
  116. "ParseError",
  117. "XML",
  118. "XMLParse",
  119. "XMLParser",
  120. "XMLTreeBuilder",
  121. "fromstring",
  122. "iterparse",
  123. "parse",
  124. "tostring",
  125. ]