123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338 |
- # $Id: universal.py 9037 2022-03-05 23:31:10Z milde $
- # Authors: David Goodger <goodger@python.org>; Ueli Schlaepfer; Günter Milde
- # Maintainer: docutils-develop@lists.sourceforge.net
- # Copyright: This module has been placed in the public domain.
- """
- Transforms needed by most or all documents:
- - `Decorations`: Generate a document's header & footer.
- - `ExposeInternals`: Expose internal attributes.
- - `Messages`: Placement of system messages generated after parsing.
- - `FilterMessages`: Remove system messages below verbosity threshold.
- - `TestMessages`: Like `Messages`, used on test runs.
- - `StripComments`: Remove comment elements from the document tree.
- - `StripClassesAndElements`: Remove elements with classes
- in `self.document.settings.strip_elements_with_classes`
- and class values in `self.document.settings.strip_classes`.
- - `SmartQuotes`: Replace ASCII quotation marks with typographic form.
- """
- __docformat__ = 'reStructuredText'
- import re
- import time
- from docutils import nodes, utils
- from docutils.transforms import Transform
- from docutils.utils import smartquotes
- class Decorations(Transform):
- """
- Populate a document's decoration element (header, footer).
- """
- default_priority = 820
- def apply(self):
- header_nodes = self.generate_header()
- if header_nodes:
- decoration = self.document.get_decoration()
- header = decoration.get_header()
- header.extend(header_nodes)
- footer_nodes = self.generate_footer()
- if footer_nodes:
- decoration = self.document.get_decoration()
- footer = decoration.get_footer()
- footer.extend(footer_nodes)
- def generate_header(self):
- return None
- def generate_footer(self):
- # @@@ Text is hard-coded for now.
- # Should be made dynamic (language-dependent).
- # @@@ Use timestamp from the `SOURCE_DATE_EPOCH`_ environment variable
- # for the datestamp?
- # See https://sourceforge.net/p/docutils/patches/132/
- # and https://reproducible-builds.org/specs/source-date-epoch/
- settings = self.document.settings
- if (settings.generator or settings.datestamp
- or settings.source_link or settings.source_url):
- text = []
- if (settings.source_link and settings._source
- or settings.source_url):
- if settings.source_url:
- source = settings.source_url
- else:
- source = utils.relative_path(settings._destination,
- settings._source)
- text.extend([
- nodes.reference('', 'View document source',
- refuri=source),
- nodes.Text('.\n')])
- if settings.datestamp:
- datestamp = time.strftime(settings.datestamp, time.gmtime())
- text.append(nodes.Text('Generated on: ' + datestamp + '.\n'))
- if settings.generator:
- text.extend([
- nodes.Text('Generated by '),
- nodes.reference('', 'Docutils',
- refuri='https://docutils.sourceforge.io/'),
- nodes.Text(' from '),
- nodes.reference('', 'reStructuredText',
- refuri='https://docutils.sourceforge.io/'
- 'rst.html'),
- nodes.Text(' source.\n')])
- return [nodes.paragraph('', '', *text)]
- else:
- return None
- class ExposeInternals(Transform):
- """
- Expose internal attributes if ``expose_internals`` setting is set.
- """
- default_priority = 840
- def not_Text(self, node):
- return not isinstance(node, nodes.Text)
- def apply(self):
- if self.document.settings.expose_internals:
- for node in self.document.findall(self.not_Text):
- for att in self.document.settings.expose_internals:
- value = getattr(node, att, None)
- if value is not None:
- node['internal:' + att] = value
- class Messages(Transform):
- """
- Place any system messages generated after parsing into a dedicated section
- of the document.
- """
- default_priority = 860
- def apply(self):
- unfiltered = self.document.transform_messages
- messages = [msg for msg in unfiltered if not msg.parent]
- if messages:
- section = nodes.section(classes=['system-messages'])
- # @@@ get this from the language module?
- section += nodes.title('', 'Docutils System Messages')
- section += messages
- self.document.transform_messages[:] = []
- self.document += section
- # TODO: fix bug #435:
- # Messages are filtered at a very late stage
- # This breaks the link from inline error messages to the corresponding
- # system message at the end of document.
- class FilterMessages(Transform):
- """
- Remove system messages below verbosity threshold.
- Convert <problematic> nodes referencing removed messages to <Text>.
- Remove "System Messages" section if empty.
- """
- default_priority = 870
- def apply(self):
- for node in tuple(self.document.findall(nodes.system_message)):
- if node['level'] < self.document.reporter.report_level:
- node.parent.remove(node)
- try: # also remove id-entry
- del(self.document.ids[node['ids'][0]])
- except (IndexError):
- pass
- for node in tuple(self.document.findall(nodes.problematic)):
- if node['refid'] not in self.document.ids:
- node.parent.replace(node, nodes.Text(node.astext()))
- for node in self.document.findall(nodes.section):
- if "system-messages" in node['classes'] and len(node) == 1:
- node.parent.remove(node)
- class TestMessages(Transform):
- """
- Append all post-parse system messages to the end of the document.
- Used for testing purposes.
- """
- default_priority = 880
- def apply(self):
- for msg in self.document.transform_messages:
- if not msg.parent:
- self.document += msg
- class StripComments(Transform):
- """
- Remove comment elements from the document tree (only if the
- ``strip_comments`` setting is enabled).
- """
- default_priority = 740
- def apply(self):
- if self.document.settings.strip_comments:
- for node in tuple(self.document.findall(nodes.comment)):
- node.parent.remove(node)
- class StripClassesAndElements(Transform):
- """
- Remove from the document tree all elements with classes in
- `self.document.settings.strip_elements_with_classes` and all "classes"
- attribute values in `self.document.settings.strip_classes`.
- """
- default_priority = 420
- def apply(self):
- if self.document.settings.strip_elements_with_classes:
- self.strip_elements = {*self.document.settings
- .strip_elements_with_classes}
- # Iterate over a tuple as removing the current node
- # corrupts the iterator returned by `iter`:
- for node in tuple(self.document.findall(self.check_classes)):
- node.parent.remove(node)
- if not self.document.settings.strip_classes:
- return
- strip_classes = self.document.settings.strip_classes
- for node in self.document.findall(nodes.Element):
- for class_value in strip_classes:
- try:
- node['classes'].remove(class_value)
- except ValueError:
- pass
- def check_classes(self, node):
- if not isinstance(node, nodes.Element):
- return False
- for class_value in node['classes'][:]:
- if class_value in self.strip_elements:
- return True
- return False
- class SmartQuotes(Transform):
- """
- Replace ASCII quotation marks with typographic form.
- Also replace multiple dashes with em-dash/en-dash characters.
- """
- default_priority = 855
- nodes_to_skip = (nodes.FixedTextElement, nodes.Special)
- """Do not apply "smartquotes" to instances of these block-level nodes."""
- literal_nodes = (nodes.FixedTextElement, nodes.Special,
- nodes.image, nodes.literal, nodes.math,
- nodes.raw, nodes.problematic)
- """Do not apply smartquotes to instances of these inline nodes."""
- smartquotes_action = 'qDe'
- """Setting to select smartquote transformations.
- The default 'qDe' educates normal quote characters: (", '),
- em- and en-dashes (---, --) and ellipses (...).
- """
- def __init__(self, document, startnode):
- Transform.__init__(self, document, startnode=startnode)
- self.unsupported_languages = set()
- def get_tokens(self, txtnodes):
- # A generator that yields ``(texttype, nodetext)`` tuples for a list
- # of "Text" nodes (interface to ``smartquotes.educate_tokens()``).
- for node in txtnodes:
- if (isinstance(node.parent, self.literal_nodes)
- or isinstance(node.parent.parent, self.literal_nodes)):
- yield 'literal', str(node)
- else:
- # SmartQuotes uses backslash escapes instead of null-escapes
- # Insert backslashes before escaped "active" characters.
- txt = re.sub('(?<=\x00)([-\\\'".`])', r'\\\1', str(node))
- yield 'plain', txt
- def apply(self):
- smart_quotes = self.document.settings.setdefault('smart_quotes',
- False)
- if not smart_quotes:
- return
- try:
- alternative = smart_quotes.startswith('alt')
- except AttributeError:
- alternative = False
- document_language = self.document.settings.language_code
- lc_smartquotes = self.document.settings.smartquotes_locales
- if lc_smartquotes:
- smartquotes.smartchars.quotes.update(dict(lc_smartquotes))
- # "Educate" quotes in normal text. Handle each block of text
- # (TextElement node) as a unit to keep context around inline nodes:
- for node in self.document.findall(nodes.TextElement):
- # skip preformatted text blocks and special elements:
- if isinstance(node, self.nodes_to_skip):
- continue
- # nested TextElements are not "block-level" elements:
- if isinstance(node.parent, nodes.TextElement):
- continue
- # list of text nodes in the "text block":
- txtnodes = [txtnode for txtnode in node.findall(nodes.Text)
- if not isinstance(txtnode.parent,
- nodes.option_string)]
- # language: use typographical quotes for language "lang"
- lang = node.get_language_code(document_language)
- # use alternative form if `smart-quotes` setting starts with "alt":
- if alternative:
- if '-x-altquot' in lang:
- lang = lang.replace('-x-altquot', '')
- else:
- lang += '-x-altquot'
- # drop unsupported subtags:
- for tag in utils.normalize_language_tag(lang):
- if tag in smartquotes.smartchars.quotes:
- lang = tag
- break
- else: # language not supported -- keep ASCII quotes
- if lang not in self.unsupported_languages:
- self.document.reporter.warning(
- 'No smart quotes defined for language "%s".' % lang,
- base_node=node)
- self.unsupported_languages.add(lang)
- lang = ''
- # Iterator educating quotes in plain text:
- # (see "utils/smartquotes.py" for the attribute setting)
- teacher = smartquotes.educate_tokens(
- self.get_tokens(txtnodes),
- attr=self.smartquotes_action, language=lang)
- for txtnode, newtext in zip(txtnodes, teacher):
- txtnode.parent.replace(txtnode, nodes.Text(newtext))
- self.unsupported_languages.clear()
|