universal.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338
  1. # $Id: universal.py 9037 2022-03-05 23:31:10Z milde $
  2. # Authors: David Goodger <goodger@python.org>; Ueli Schlaepfer; Günter Milde
  3. # Maintainer: docutils-develop@lists.sourceforge.net
  4. # Copyright: This module has been placed in the public domain.
  5. """
  6. Transforms needed by most or all documents:
  7. - `Decorations`: Generate a document's header & footer.
  8. - `ExposeInternals`: Expose internal attributes.
  9. - `Messages`: Placement of system messages generated after parsing.
  10. - `FilterMessages`: Remove system messages below verbosity threshold.
  11. - `TestMessages`: Like `Messages`, used on test runs.
  12. - `StripComments`: Remove comment elements from the document tree.
  13. - `StripClassesAndElements`: Remove elements with classes
  14. in `self.document.settings.strip_elements_with_classes`
  15. and class values in `self.document.settings.strip_classes`.
  16. - `SmartQuotes`: Replace ASCII quotation marks with typographic form.
  17. """
  18. __docformat__ = 'reStructuredText'
  19. import re
  20. import time
  21. from docutils import nodes, utils
  22. from docutils.transforms import Transform
  23. from docutils.utils import smartquotes
  24. class Decorations(Transform):
  25. """
  26. Populate a document's decoration element (header, footer).
  27. """
  28. default_priority = 820
  29. def apply(self):
  30. header_nodes = self.generate_header()
  31. if header_nodes:
  32. decoration = self.document.get_decoration()
  33. header = decoration.get_header()
  34. header.extend(header_nodes)
  35. footer_nodes = self.generate_footer()
  36. if footer_nodes:
  37. decoration = self.document.get_decoration()
  38. footer = decoration.get_footer()
  39. footer.extend(footer_nodes)
  40. def generate_header(self):
  41. return None
  42. def generate_footer(self):
  43. # @@@ Text is hard-coded for now.
  44. # Should be made dynamic (language-dependent).
  45. # @@@ Use timestamp from the `SOURCE_DATE_EPOCH`_ environment variable
  46. # for the datestamp?
  47. # See https://sourceforge.net/p/docutils/patches/132/
  48. # and https://reproducible-builds.org/specs/source-date-epoch/
  49. settings = self.document.settings
  50. if (settings.generator or settings.datestamp
  51. or settings.source_link or settings.source_url):
  52. text = []
  53. if (settings.source_link and settings._source
  54. or settings.source_url):
  55. if settings.source_url:
  56. source = settings.source_url
  57. else:
  58. source = utils.relative_path(settings._destination,
  59. settings._source)
  60. text.extend([
  61. nodes.reference('', 'View document source',
  62. refuri=source),
  63. nodes.Text('.\n')])
  64. if settings.datestamp:
  65. datestamp = time.strftime(settings.datestamp, time.gmtime())
  66. text.append(nodes.Text('Generated on: ' + datestamp + '.\n'))
  67. if settings.generator:
  68. text.extend([
  69. nodes.Text('Generated by '),
  70. nodes.reference('', 'Docutils',
  71. refuri='https://docutils.sourceforge.io/'),
  72. nodes.Text(' from '),
  73. nodes.reference('', 'reStructuredText',
  74. refuri='https://docutils.sourceforge.io/'
  75. 'rst.html'),
  76. nodes.Text(' source.\n')])
  77. return [nodes.paragraph('', '', *text)]
  78. else:
  79. return None
  80. class ExposeInternals(Transform):
  81. """
  82. Expose internal attributes if ``expose_internals`` setting is set.
  83. """
  84. default_priority = 840
  85. def not_Text(self, node):
  86. return not isinstance(node, nodes.Text)
  87. def apply(self):
  88. if self.document.settings.expose_internals:
  89. for node in self.document.findall(self.not_Text):
  90. for att in self.document.settings.expose_internals:
  91. value = getattr(node, att, None)
  92. if value is not None:
  93. node['internal:' + att] = value
  94. class Messages(Transform):
  95. """
  96. Place any system messages generated after parsing into a dedicated section
  97. of the document.
  98. """
  99. default_priority = 860
  100. def apply(self):
  101. unfiltered = self.document.transform_messages
  102. messages = [msg for msg in unfiltered if not msg.parent]
  103. if messages:
  104. section = nodes.section(classes=['system-messages'])
  105. # @@@ get this from the language module?
  106. section += nodes.title('', 'Docutils System Messages')
  107. section += messages
  108. self.document.transform_messages[:] = []
  109. self.document += section
  110. # TODO: fix bug #435:
  111. # Messages are filtered at a very late stage
  112. # This breaks the link from inline error messages to the corresponding
  113. # system message at the end of document.
  114. class FilterMessages(Transform):
  115. """
  116. Remove system messages below verbosity threshold.
  117. Convert <problematic> nodes referencing removed messages to <Text>.
  118. Remove "System Messages" section if empty.
  119. """
  120. default_priority = 870
  121. def apply(self):
  122. for node in tuple(self.document.findall(nodes.system_message)):
  123. if node['level'] < self.document.reporter.report_level:
  124. node.parent.remove(node)
  125. try: # also remove id-entry
  126. del(self.document.ids[node['ids'][0]])
  127. except (IndexError):
  128. pass
  129. for node in tuple(self.document.findall(nodes.problematic)):
  130. if node['refid'] not in self.document.ids:
  131. node.parent.replace(node, nodes.Text(node.astext()))
  132. for node in self.document.findall(nodes.section):
  133. if "system-messages" in node['classes'] and len(node) == 1:
  134. node.parent.remove(node)
  135. class TestMessages(Transform):
  136. """
  137. Append all post-parse system messages to the end of the document.
  138. Used for testing purposes.
  139. """
  140. default_priority = 880
  141. def apply(self):
  142. for msg in self.document.transform_messages:
  143. if not msg.parent:
  144. self.document += msg
  145. class StripComments(Transform):
  146. """
  147. Remove comment elements from the document tree (only if the
  148. ``strip_comments`` setting is enabled).
  149. """
  150. default_priority = 740
  151. def apply(self):
  152. if self.document.settings.strip_comments:
  153. for node in tuple(self.document.findall(nodes.comment)):
  154. node.parent.remove(node)
  155. class StripClassesAndElements(Transform):
  156. """
  157. Remove from the document tree all elements with classes in
  158. `self.document.settings.strip_elements_with_classes` and all "classes"
  159. attribute values in `self.document.settings.strip_classes`.
  160. """
  161. default_priority = 420
  162. def apply(self):
  163. if self.document.settings.strip_elements_with_classes:
  164. self.strip_elements = {*self.document.settings
  165. .strip_elements_with_classes}
  166. # Iterate over a tuple as removing the current node
  167. # corrupts the iterator returned by `iter`:
  168. for node in tuple(self.document.findall(self.check_classes)):
  169. node.parent.remove(node)
  170. if not self.document.settings.strip_classes:
  171. return
  172. strip_classes = self.document.settings.strip_classes
  173. for node in self.document.findall(nodes.Element):
  174. for class_value in strip_classes:
  175. try:
  176. node['classes'].remove(class_value)
  177. except ValueError:
  178. pass
  179. def check_classes(self, node):
  180. if not isinstance(node, nodes.Element):
  181. return False
  182. for class_value in node['classes'][:]:
  183. if class_value in self.strip_elements:
  184. return True
  185. return False
  186. class SmartQuotes(Transform):
  187. """
  188. Replace ASCII quotation marks with typographic form.
  189. Also replace multiple dashes with em-dash/en-dash characters.
  190. """
  191. default_priority = 855
  192. nodes_to_skip = (nodes.FixedTextElement, nodes.Special)
  193. """Do not apply "smartquotes" to instances of these block-level nodes."""
  194. literal_nodes = (nodes.FixedTextElement, nodes.Special,
  195. nodes.image, nodes.literal, nodes.math,
  196. nodes.raw, nodes.problematic)
  197. """Do not apply smartquotes to instances of these inline nodes."""
  198. smartquotes_action = 'qDe'
  199. """Setting to select smartquote transformations.
  200. The default 'qDe' educates normal quote characters: (", '),
  201. em- and en-dashes (---, --) and ellipses (...).
  202. """
  203. def __init__(self, document, startnode):
  204. Transform.__init__(self, document, startnode=startnode)
  205. self.unsupported_languages = set()
  206. def get_tokens(self, txtnodes):
  207. # A generator that yields ``(texttype, nodetext)`` tuples for a list
  208. # of "Text" nodes (interface to ``smartquotes.educate_tokens()``).
  209. for node in txtnodes:
  210. if (isinstance(node.parent, self.literal_nodes)
  211. or isinstance(node.parent.parent, self.literal_nodes)):
  212. yield 'literal', str(node)
  213. else:
  214. # SmartQuotes uses backslash escapes instead of null-escapes
  215. # Insert backslashes before escaped "active" characters.
  216. txt = re.sub('(?<=\x00)([-\\\'".`])', r'\\\1', str(node))
  217. yield 'plain', txt
  218. def apply(self):
  219. smart_quotes = self.document.settings.setdefault('smart_quotes',
  220. False)
  221. if not smart_quotes:
  222. return
  223. try:
  224. alternative = smart_quotes.startswith('alt')
  225. except AttributeError:
  226. alternative = False
  227. document_language = self.document.settings.language_code
  228. lc_smartquotes = self.document.settings.smartquotes_locales
  229. if lc_smartquotes:
  230. smartquotes.smartchars.quotes.update(dict(lc_smartquotes))
  231. # "Educate" quotes in normal text. Handle each block of text
  232. # (TextElement node) as a unit to keep context around inline nodes:
  233. for node in self.document.findall(nodes.TextElement):
  234. # skip preformatted text blocks and special elements:
  235. if isinstance(node, self.nodes_to_skip):
  236. continue
  237. # nested TextElements are not "block-level" elements:
  238. if isinstance(node.parent, nodes.TextElement):
  239. continue
  240. # list of text nodes in the "text block":
  241. txtnodes = [txtnode for txtnode in node.findall(nodes.Text)
  242. if not isinstance(txtnode.parent,
  243. nodes.option_string)]
  244. # language: use typographical quotes for language "lang"
  245. lang = node.get_language_code(document_language)
  246. # use alternative form if `smart-quotes` setting starts with "alt":
  247. if alternative:
  248. if '-x-altquot' in lang:
  249. lang = lang.replace('-x-altquot', '')
  250. else:
  251. lang += '-x-altquot'
  252. # drop unsupported subtags:
  253. for tag in utils.normalize_language_tag(lang):
  254. if tag in smartquotes.smartchars.quotes:
  255. lang = tag
  256. break
  257. else: # language not supported -- keep ASCII quotes
  258. if lang not in self.unsupported_languages:
  259. self.document.reporter.warning(
  260. 'No smart quotes defined for language "%s".' % lang,
  261. base_node=node)
  262. self.unsupported_languages.add(lang)
  263. lang = ''
  264. # Iterator educating quotes in plain text:
  265. # (see "utils/smartquotes.py" for the attribute setting)
  266. teacher = smartquotes.educate_tokens(
  267. self.get_tokens(txtnodes),
  268. attr=self.smartquotes_action, language=lang)
  269. for txtnode, newtext in zip(txtnodes, teacher):
  270. txtnode.parent.replace(txtnode, nodes.Text(newtext))
  271. self.unsupported_languages.clear()