nodes.py 79 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314
  1. # $Id: nodes.py 9067 2022-06-10 11:08:46Z milde $
  2. # Author: David Goodger <goodger@python.org>
  3. # Maintainer: docutils-develop@lists.sourceforge.net
  4. # Copyright: This module has been placed in the public domain.
  5. """
  6. Docutils document tree element class library.
  7. Classes in CamelCase are abstract base classes or auxiliary classes. The one
  8. exception is `Text`, for a text (PCDATA) node; uppercase is used to
  9. differentiate from element classes. Classes in lower_case_with_underscores
  10. are element classes, matching the XML element generic identifiers in the DTD_.
  11. The position of each node (the level at which it can occur) is significant and
  12. is represented by abstract base classes (`Root`, `Structural`, `Body`,
  13. `Inline`, etc.). Certain transformations will be easier because we can use
  14. ``isinstance(node, base_class)`` to determine the position of the node in the
  15. hierarchy.
  16. .. _DTD: https://docutils.sourceforge.io/docs/ref/docutils.dtd
  17. """
  18. __docformat__ = 'reStructuredText'
  19. from collections import Counter
  20. import re
  21. import sys
  22. import warnings
  23. import unicodedata
  24. # import xml.dom.minidom as dom # -> conditional import in Node.asdom()
  25. # and document.asdom()
  26. # import docutils.transforms # -> conditional import in document.__init__()
  27. # ==============================
  28. # Functional Node Base Classes
  29. # ==============================
  30. class Node:
  31. """Abstract base class of nodes in a document tree."""
  32. parent = None
  33. """Back-reference to the Node immediately containing this Node."""
  34. source = None
  35. """Path or description of the input source which generated this Node."""
  36. line = None
  37. """The line number (1-based) of the beginning of this Node in `source`."""
  38. _document = None
  39. @property
  40. def document(self):
  41. """Return the `document` root node of the tree containing this Node.
  42. """
  43. try:
  44. return self._document or self.parent.document
  45. except AttributeError:
  46. return None
  47. @document.setter
  48. def document(self, value):
  49. self._document = value
  50. def __bool__(self):
  51. """
  52. Node instances are always true, even if they're empty. A node is more
  53. than a simple container. Its boolean "truth" does not depend on
  54. having one or more subnodes in the doctree.
  55. Use `len()` to check node length.
  56. """
  57. return True
  58. def asdom(self, dom=None):
  59. """Return a DOM **fragment** representation of this Node."""
  60. if dom is None:
  61. import xml.dom.minidom as dom
  62. domroot = dom.Document()
  63. return self._dom_node(domroot)
  64. def pformat(self, indent=' ', level=0):
  65. """
  66. Return an indented pseudo-XML representation, for test purposes.
  67. Override in subclasses.
  68. """
  69. raise NotImplementedError
  70. def copy(self):
  71. """Return a copy of self."""
  72. raise NotImplementedError
  73. def deepcopy(self):
  74. """Return a deep copy of self (also copying children)."""
  75. raise NotImplementedError
  76. def astext(self):
  77. """Return a string representation of this Node."""
  78. raise NotImplementedError
  79. def setup_child(self, child):
  80. child.parent = self
  81. if self.document:
  82. child.document = self.document
  83. if child.source is None:
  84. child.source = self.document.current_source
  85. if child.line is None:
  86. child.line = self.document.current_line
  87. def walk(self, visitor):
  88. """
  89. Traverse a tree of `Node` objects, calling the
  90. `dispatch_visit()` method of `visitor` when entering each
  91. node. (The `walkabout()` method is similar, except it also
  92. calls the `dispatch_departure()` method before exiting each
  93. node.)
  94. This tree traversal supports limited in-place tree
  95. modifications. Replacing one node with one or more nodes is
  96. OK, as is removing an element. However, if the node removed
  97. or replaced occurs after the current node, the old node will
  98. still be traversed, and any new nodes will not.
  99. Within ``visit`` methods (and ``depart`` methods for
  100. `walkabout()`), `TreePruningException` subclasses may be raised
  101. (`SkipChildren`, `SkipSiblings`, `SkipNode`, `SkipDeparture`).
  102. Parameter `visitor`: A `NodeVisitor` object, containing a
  103. ``visit`` implementation for each `Node` subclass encountered.
  104. Return true if we should stop the traversal.
  105. """
  106. stop = False
  107. visitor.document.reporter.debug(
  108. 'docutils.nodes.Node.walk calling dispatch_visit for %s'
  109. % self.__class__.__name__)
  110. try:
  111. try:
  112. visitor.dispatch_visit(self)
  113. except (SkipChildren, SkipNode):
  114. return stop
  115. except SkipDeparture: # not applicable; ignore
  116. pass
  117. children = self.children
  118. try:
  119. for child in children[:]:
  120. if child.walk(visitor):
  121. stop = True
  122. break
  123. except SkipSiblings:
  124. pass
  125. except StopTraversal:
  126. stop = True
  127. return stop
  128. def walkabout(self, visitor):
  129. """
  130. Perform a tree traversal similarly to `Node.walk()` (which
  131. see), except also call the `dispatch_departure()` method
  132. before exiting each node.
  133. Parameter `visitor`: A `NodeVisitor` object, containing a
  134. ``visit`` and ``depart`` implementation for each `Node`
  135. subclass encountered.
  136. Return true if we should stop the traversal.
  137. """
  138. call_depart = True
  139. stop = False
  140. visitor.document.reporter.debug(
  141. 'docutils.nodes.Node.walkabout calling dispatch_visit for %s'
  142. % self.__class__.__name__)
  143. try:
  144. try:
  145. visitor.dispatch_visit(self)
  146. except SkipNode:
  147. return stop
  148. except SkipDeparture:
  149. call_depart = False
  150. children = self.children
  151. try:
  152. for child in children[:]:
  153. if child.walkabout(visitor):
  154. stop = True
  155. break
  156. except SkipSiblings:
  157. pass
  158. except SkipChildren:
  159. pass
  160. except StopTraversal:
  161. stop = True
  162. if call_depart:
  163. visitor.document.reporter.debug(
  164. 'docutils.nodes.Node.walkabout calling dispatch_departure '
  165. 'for %s' % self.__class__.__name__)
  166. visitor.dispatch_departure(self)
  167. return stop
  168. def _fast_findall(self, cls):
  169. """Return iterator that only supports instance checks."""
  170. if isinstance(self, cls):
  171. yield self
  172. for child in self.children:
  173. yield from child._fast_findall(cls)
  174. def _superfast_findall(self):
  175. """Return iterator that doesn't check for a condition."""
  176. # This is different from ``iter(self)`` implemented via
  177. # __getitem__() and __len__() in the Element subclass,
  178. # which yields only the direct children.
  179. yield self
  180. for child in self.children:
  181. yield from child._superfast_findall()
  182. def traverse(self, condition=None, include_self=True, descend=True,
  183. siblings=False, ascend=False):
  184. """Return list of nodes following `self`.
  185. For looping, Node.findall() is faster and more memory efficient.
  186. """
  187. # traverse() may be eventually removed:
  188. warnings.warn('nodes.Node.traverse() is obsoleted by Node.findall().',
  189. PendingDeprecationWarning, stacklevel=2)
  190. return list(self.findall(condition, include_self, descend,
  191. siblings, ascend))
  192. def findall(self, condition=None, include_self=True, descend=True,
  193. siblings=False, ascend=False):
  194. """
  195. Return an iterator yielding nodes following `self`:
  196. * self (if `include_self` is true)
  197. * all descendants in tree traversal order (if `descend` is true)
  198. * the following siblings (if `siblings` is true) and their
  199. descendants (if also `descend` is true)
  200. * the following siblings of the parent (if `ascend` is true) and
  201. their descendants (if also `descend` is true), and so on.
  202. If `condition` is not None, the iterator yields only nodes
  203. for which ``condition(node)`` is true. If `condition` is a
  204. node class ``cls``, it is equivalent to a function consisting
  205. of ``return isinstance(node, cls)``.
  206. If `ascend` is true, assume `siblings` to be true as well.
  207. If the tree structure is modified during iteration, the result
  208. is undefined.
  209. For example, given the following tree::
  210. <paragraph>
  211. <emphasis> <--- emphasis.traverse() and
  212. <strong> <--- strong.traverse() are called.
  213. Foo
  214. Bar
  215. <reference name="Baz" refid="baz">
  216. Baz
  217. Then tuple(emphasis.traverse()) equals ::
  218. (<emphasis>, <strong>, <#text: Foo>, <#text: Bar>)
  219. and list(strong.traverse(ascend=True) equals ::
  220. [<strong>, <#text: Foo>, <#text: Bar>, <reference>, <#text: Baz>]
  221. """
  222. if ascend:
  223. siblings = True
  224. # Check for special argument combinations that allow using an
  225. # optimized version of traverse()
  226. if include_self and descend and not siblings:
  227. if condition is None:
  228. yield from self._superfast_findall()
  229. return
  230. elif isinstance(condition, type):
  231. yield from self._fast_findall(condition)
  232. return
  233. # Check if `condition` is a class (check for TypeType for Python
  234. # implementations that use only new-style classes, like PyPy).
  235. if isinstance(condition, type):
  236. node_class = condition
  237. def condition(node, node_class=node_class):
  238. return isinstance(node, node_class)
  239. if include_self and (condition is None or condition(self)):
  240. yield self
  241. if descend and len(self.children):
  242. for child in self:
  243. yield from child.findall(condition=condition,
  244. include_self=True, descend=True,
  245. siblings=False, ascend=False)
  246. if siblings or ascend:
  247. node = self
  248. while node.parent:
  249. index = node.parent.index(node)
  250. # extra check since Text nodes have value-equality
  251. while node.parent[index] is not node:
  252. index = node.parent.index(node, index + 1)
  253. for sibling in node.parent[index+1:]:
  254. yield from sibling.findall(
  255. condition=condition,
  256. include_self=True, descend=descend,
  257. siblings=False, ascend=False)
  258. if not ascend:
  259. break
  260. else:
  261. node = node.parent
  262. def next_node(self, condition=None, include_self=False, descend=True,
  263. siblings=False, ascend=False):
  264. """
  265. Return the first node in the iterator returned by findall(),
  266. or None if the iterable is empty.
  267. Parameter list is the same as of traverse. Note that `include_self`
  268. defaults to False, though.
  269. """
  270. try:
  271. return next(self.findall(condition, include_self,
  272. descend, siblings, ascend))
  273. except StopIteration:
  274. return None
  275. def previous_sibling(self):
  276. """Return preceding sibling node or ``None``."""
  277. try:
  278. return self.parent[self.parent.index(self)-1]
  279. except (AttributeError, IndexError):
  280. return None
  281. class reprunicode(str):
  282. """
  283. Deprecated backwards compatibility stub. Use the standard `str` instead.
  284. """
  285. def __init__(self, s):
  286. warnings.warn('nodes.reprunicode() is not required with Python 3'
  287. ' and will be removed in Docutils 0.21 or later.',
  288. DeprecationWarning, stacklevel=2)
  289. super().__init__()
  290. def ensure_str(s):
  291. """
  292. Deprecated backwards compatibility stub returning `s`.
  293. """
  294. warnings.warn('nodes.ensure_str() is not required with Python 3'
  295. ' and will be removed in Docutils 0.21 or later.',
  296. DeprecationWarning, stacklevel=2)
  297. return s
  298. # definition moved here from `utils` to avoid circular import dependency
  299. def unescape(text, restore_backslashes=False, respect_whitespace=False):
  300. """
  301. Return a string with nulls removed or restored to backslashes.
  302. Backslash-escaped spaces are also removed.
  303. """
  304. # `respect_whitespace` is ignored (since introduction 2016-12-16)
  305. if restore_backslashes:
  306. return text.replace('\x00', '\\')
  307. else:
  308. for sep in ['\x00 ', '\x00\n', '\x00']:
  309. text = ''.join(text.split(sep))
  310. return text
  311. class Text(Node, str):
  312. """
  313. Instances are terminal nodes (leaves) containing text only; no child
  314. nodes or attributes. Initialize by passing a string to the constructor.
  315. Access the raw (null-escaped) text with ``str(<instance>)``
  316. and unescaped text with ``<instance>.astext()``.
  317. """
  318. tagname = '#text'
  319. children = ()
  320. """Text nodes have no children, and cannot have children."""
  321. def __new__(cls, data, rawsource=None):
  322. """Assert that `data` is not an array of bytes
  323. and warn if the deprecated `rawsource` argument is used.
  324. """
  325. if isinstance(data, bytes):
  326. raise TypeError('expecting str data, not bytes')
  327. if rawsource is not None:
  328. warnings.warn('nodes.Text: initialization argument "rawsource" '
  329. 'is ignored and will be removed in Docutils 2.0.',
  330. DeprecationWarning, stacklevel=2)
  331. return str.__new__(cls, data)
  332. def shortrepr(self, maxlen=18):
  333. data = self
  334. if len(data) > maxlen:
  335. data = data[:maxlen-4] + ' ...'
  336. return '<%s: %r>' % (self.tagname, str(data))
  337. def __repr__(self):
  338. return self.shortrepr(maxlen=68)
  339. def _dom_node(self, domroot):
  340. return domroot.createTextNode(str(self))
  341. def astext(self):
  342. return str(unescape(self))
  343. def copy(self):
  344. return self.__class__(str(self))
  345. def deepcopy(self):
  346. return self.copy()
  347. def pformat(self, indent=' ', level=0):
  348. try:
  349. if self.document.settings.detailed:
  350. tag = '%s%s' % (indent*level, '<#text>')
  351. lines = (indent*(level+1) + repr(line)
  352. for line in self.splitlines(True))
  353. return '\n'.join((tag, *lines)) + '\n'
  354. except AttributeError:
  355. pass
  356. indent = indent * level
  357. lines = [indent+line for line in self.astext().splitlines()]
  358. if not lines:
  359. return ''
  360. return '\n'.join(lines) + '\n'
  361. # rstrip and lstrip are used by substitution definitions where
  362. # they are expected to return a Text instance, this was formerly
  363. # taken care of by UserString.
  364. def rstrip(self, chars=None):
  365. return self.__class__(str.rstrip(self, chars))
  366. def lstrip(self, chars=None):
  367. return self.__class__(str.lstrip(self, chars))
  368. class Element(Node):
  369. """
  370. `Element` is the superclass to all specific elements.
  371. Elements contain attributes and child nodes.
  372. They can be described as a cross between a list and a dictionary.
  373. Elements emulate dictionaries for external [#]_ attributes, indexing by
  374. attribute name (a string). To set the attribute 'att' to 'value', do::
  375. element['att'] = 'value'
  376. .. [#] External attributes correspond to the XML element attributes.
  377. From its `Node` superclass, Element also inherits "internal"
  378. class attributes that are accessed using the standard syntax, e.g.
  379. ``element.parent``.
  380. There are two special attributes: 'ids' and 'names'. Both are
  381. lists of unique identifiers: 'ids' conform to the regular expression
  382. ``[a-z](-?[a-z0-9]+)*`` (see the make_id() function for rationale and
  383. details). 'names' serve as user-friendly interfaces to IDs; they are
  384. case- and whitespace-normalized (see the fully_normalize_name() function).
  385. Elements emulate lists for child nodes (element nodes and/or text
  386. nodes), indexing by integer. To get the first child node, use::
  387. element[0]
  388. to iterate over the child nodes (without descending), use::
  389. for child in element:
  390. ...
  391. Elements may be constructed using the ``+=`` operator. To add one new
  392. child node to element, do::
  393. element += node
  394. This is equivalent to ``element.append(node)``.
  395. To add a list of multiple child nodes at once, use the same ``+=``
  396. operator::
  397. element += [node1, node2]
  398. This is equivalent to ``element.extend([node1, node2])``.
  399. """
  400. basic_attributes = ('ids', 'classes', 'names', 'dupnames')
  401. """Tuple of attributes which are defined for every Element-derived class
  402. instance and can be safely transferred to a different node."""
  403. local_attributes = ('backrefs',)
  404. """Tuple of class-specific attributes that should not be copied with the
  405. standard attributes when replacing a node.
  406. NOTE: Derived classes should override this value to prevent any of its
  407. attributes being copied by adding to the value in its parent class."""
  408. list_attributes = basic_attributes + local_attributes
  409. """Tuple of attributes that are automatically initialized to empty lists
  410. for all nodes."""
  411. known_attributes = list_attributes + ('source',)
  412. """Tuple of attributes that are known to the Element base class."""
  413. tagname = None
  414. """The element generic identifier. If None, it is set as an instance
  415. attribute to the name of the class."""
  416. child_text_separator = '\n\n'
  417. """Separator for child nodes, used by `astext()` method."""
  418. def __init__(self, rawsource='', *children, **attributes):
  419. self.rawsource = rawsource
  420. """The raw text from which this element was constructed.
  421. NOTE: some elements do not set this value (default '').
  422. """
  423. self.children = []
  424. """List of child nodes (elements and/or `Text`)."""
  425. self.extend(children) # maintain parent info
  426. self.attributes = {}
  427. """Dictionary of attribute {name: value}."""
  428. # Initialize list attributes.
  429. for att in self.list_attributes:
  430. self.attributes[att] = []
  431. for att, value in attributes.items():
  432. att = att.lower()
  433. if att in self.list_attributes:
  434. # mutable list; make a copy for this node
  435. self.attributes[att] = value[:]
  436. else:
  437. self.attributes[att] = value
  438. if self.tagname is None:
  439. self.tagname = self.__class__.__name__
  440. def _dom_node(self, domroot):
  441. element = domroot.createElement(self.tagname)
  442. for attribute, value in self.attlist():
  443. if isinstance(value, list):
  444. value = ' '.join(serial_escape('%s' % (v,)) for v in value)
  445. element.setAttribute(attribute, '%s' % value)
  446. for child in self.children:
  447. element.appendChild(child._dom_node(domroot))
  448. return element
  449. def __repr__(self):
  450. data = ''
  451. for c in self.children:
  452. data += c.shortrepr()
  453. if len(data) > 60:
  454. data = data[:56] + ' ...'
  455. break
  456. if self['names']:
  457. return '<%s "%s": %s>' % (self.__class__.__name__,
  458. '; '.join(self['names']), data)
  459. else:
  460. return '<%s: %s>' % (self.__class__.__name__, data)
  461. def shortrepr(self):
  462. if self['names']:
  463. return '<%s "%s"...>' % (self.__class__.__name__,
  464. '; '.join(self['names']))
  465. else:
  466. return '<%s...>' % self.tagname
  467. def __str__(self):
  468. if self.children:
  469. return '%s%s%s' % (self.starttag(),
  470. ''.join(str(c) for c in self.children),
  471. self.endtag())
  472. else:
  473. return self.emptytag()
  474. def starttag(self, quoteattr=None):
  475. # the optional arg is used by the docutils_xml writer
  476. if quoteattr is None:
  477. quoteattr = pseudo_quoteattr
  478. parts = [self.tagname]
  479. for name, value in self.attlist():
  480. if value is None: # boolean attribute
  481. parts.append('%s="True"' % name)
  482. continue
  483. if isinstance(value, list):
  484. values = [serial_escape('%s' % (v,)) for v in value]
  485. value = ' '.join(values)
  486. else:
  487. value = str(value)
  488. value = quoteattr(value)
  489. parts.append('%s=%s' % (name, value))
  490. return '<%s>' % ' '.join(parts)
  491. def endtag(self):
  492. return '</%s>' % self.tagname
  493. def emptytag(self):
  494. attributes = ('%s="%s"' % (n, v) for n, v in self.attlist())
  495. return '<%s/>' % ' '.join((self.tagname, *attributes))
  496. def __len__(self):
  497. return len(self.children)
  498. def __contains__(self, key):
  499. # Test for both, children and attributes with operator ``in``.
  500. if isinstance(key, str):
  501. return key in self.attributes
  502. return key in self.children
  503. def __getitem__(self, key):
  504. if isinstance(key, str):
  505. return self.attributes[key]
  506. elif isinstance(key, int):
  507. return self.children[key]
  508. elif isinstance(key, slice):
  509. assert key.step in (None, 1), 'cannot handle slice with stride'
  510. return self.children[key.start:key.stop]
  511. else:
  512. raise TypeError('element index must be an integer, a slice, or '
  513. 'an attribute name string')
  514. def __setitem__(self, key, item):
  515. if isinstance(key, str):
  516. self.attributes[str(key)] = item
  517. elif isinstance(key, int):
  518. self.setup_child(item)
  519. self.children[key] = item
  520. elif isinstance(key, slice):
  521. assert key.step in (None, 1), 'cannot handle slice with stride'
  522. for node in item:
  523. self.setup_child(node)
  524. self.children[key.start:key.stop] = item
  525. else:
  526. raise TypeError('element index must be an integer, a slice, or '
  527. 'an attribute name string')
  528. def __delitem__(self, key):
  529. if isinstance(key, str):
  530. del self.attributes[key]
  531. elif isinstance(key, int):
  532. del self.children[key]
  533. elif isinstance(key, slice):
  534. assert key.step in (None, 1), 'cannot handle slice with stride'
  535. del self.children[key.start:key.stop]
  536. else:
  537. raise TypeError('element index must be an integer, a simple '
  538. 'slice, or an attribute name string')
  539. def __add__(self, other):
  540. return self.children + other
  541. def __radd__(self, other):
  542. return other + self.children
  543. def __iadd__(self, other):
  544. """Append a node or a list of nodes to `self.children`."""
  545. if isinstance(other, Node):
  546. self.append(other)
  547. elif other is not None:
  548. self.extend(other)
  549. return self
  550. def astext(self):
  551. return self.child_text_separator.join(
  552. [child.astext() for child in self.children])
  553. def non_default_attributes(self):
  554. atts = {}
  555. for key, value in self.attributes.items():
  556. if self.is_not_default(key):
  557. atts[key] = value
  558. return atts
  559. def attlist(self):
  560. return sorted(self.non_default_attributes().items())
  561. def get(self, key, failobj=None):
  562. return self.attributes.get(key, failobj)
  563. def hasattr(self, attr):
  564. return attr in self.attributes
  565. def delattr(self, attr):
  566. if attr in self.attributes:
  567. del self.attributes[attr]
  568. def setdefault(self, key, failobj=None):
  569. return self.attributes.setdefault(key, failobj)
  570. has_key = hasattr
  571. def get_language_code(self, fallback=''):
  572. """Return node's language tag.
  573. Look iteratively in self and parents for a class argument
  574. starting with ``language-`` and return the remainder of it
  575. (which should be a `BCP49` language tag) or the `fallback`.
  576. """
  577. for cls in self.get('classes', []):
  578. if cls.startswith('language-'):
  579. return cls[9:]
  580. try:
  581. return self.parent.get_language(fallback)
  582. except AttributeError:
  583. return fallback
  584. def append(self, item):
  585. self.setup_child(item)
  586. self.children.append(item)
  587. def extend(self, item):
  588. for node in item:
  589. self.append(node)
  590. def insert(self, index, item):
  591. if isinstance(item, Node):
  592. self.setup_child(item)
  593. self.children.insert(index, item)
  594. elif item is not None:
  595. self[index:index] = item
  596. def pop(self, i=-1):
  597. return self.children.pop(i)
  598. def remove(self, item):
  599. self.children.remove(item)
  600. def index(self, item, start=0, stop=sys.maxsize):
  601. return self.children.index(item, start, stop)
  602. def is_not_default(self, key):
  603. if self[key] == [] and key in self.list_attributes:
  604. return 0
  605. else:
  606. return 1
  607. def update_basic_atts(self, dict_):
  608. """
  609. Update basic attributes ('ids', 'names', 'classes',
  610. 'dupnames', but not 'source') from node or dictionary `dict_`.
  611. """
  612. if isinstance(dict_, Node):
  613. dict_ = dict_.attributes
  614. for att in self.basic_attributes:
  615. self.append_attr_list(att, dict_.get(att, []))
  616. def append_attr_list(self, attr, values):
  617. """
  618. For each element in values, if it does not exist in self[attr], append
  619. it.
  620. NOTE: Requires self[attr] and values to be sequence type and the
  621. former should specifically be a list.
  622. """
  623. # List Concatenation
  624. for value in values:
  625. if value not in self[attr]:
  626. self[attr].append(value)
  627. def coerce_append_attr_list(self, attr, value):
  628. """
  629. First, convert both self[attr] and value to a non-string sequence
  630. type; if either is not already a sequence, convert it to a list of one
  631. element. Then call append_attr_list.
  632. NOTE: self[attr] and value both must not be None.
  633. """
  634. # List Concatenation
  635. if not isinstance(self.get(attr), list):
  636. self[attr] = [self[attr]]
  637. if not isinstance(value, list):
  638. value = [value]
  639. self.append_attr_list(attr, value)
  640. def replace_attr(self, attr, value, force=True):
  641. """
  642. If self[attr] does not exist or force is True or omitted, set
  643. self[attr] to value, otherwise do nothing.
  644. """
  645. # One or the other
  646. if force or self.get(attr) is None:
  647. self[attr] = value
  648. def copy_attr_convert(self, attr, value, replace=True):
  649. """
  650. If attr is an attribute of self, set self[attr] to
  651. [self[attr], value], otherwise set self[attr] to value.
  652. NOTE: replace is not used by this function and is kept only for
  653. compatibility with the other copy functions.
  654. """
  655. if self.get(attr) is not value:
  656. self.coerce_append_attr_list(attr, value)
  657. def copy_attr_coerce(self, attr, value, replace):
  658. """
  659. If attr is an attribute of self and either self[attr] or value is a
  660. list, convert all non-sequence values to a sequence of 1 element and
  661. then concatenate the two sequence, setting the result to self[attr].
  662. If both self[attr] and value are non-sequences and replace is True or
  663. self[attr] is None, replace self[attr] with value. Otherwise, do
  664. nothing.
  665. """
  666. if self.get(attr) is not value:
  667. if isinstance(self.get(attr), list) or \
  668. isinstance(value, list):
  669. self.coerce_append_attr_list(attr, value)
  670. else:
  671. self.replace_attr(attr, value, replace)
  672. def copy_attr_concatenate(self, attr, value, replace):
  673. """
  674. If attr is an attribute of self and both self[attr] and value are
  675. lists, concatenate the two sequences, setting the result to
  676. self[attr]. If either self[attr] or value are non-sequences and
  677. replace is True or self[attr] is None, replace self[attr] with value.
  678. Otherwise, do nothing.
  679. """
  680. if self.get(attr) is not value:
  681. if isinstance(self.get(attr), list) and \
  682. isinstance(value, list):
  683. self.append_attr_list(attr, value)
  684. else:
  685. self.replace_attr(attr, value, replace)
  686. def copy_attr_consistent(self, attr, value, replace):
  687. """
  688. If replace is True or self[attr] is None, replace self[attr] with
  689. value. Otherwise, do nothing.
  690. """
  691. if self.get(attr) is not value:
  692. self.replace_attr(attr, value, replace)
  693. def update_all_atts(self, dict_, update_fun=copy_attr_consistent,
  694. replace=True, and_source=False):
  695. """
  696. Updates all attributes from node or dictionary `dict_`.
  697. Appends the basic attributes ('ids', 'names', 'classes',
  698. 'dupnames', but not 'source') and then, for all other attributes in
  699. dict_, updates the same attribute in self. When attributes with the
  700. same identifier appear in both self and dict_, the two values are
  701. merged based on the value of update_fun. Generally, when replace is
  702. True, the values in self are replaced or merged with the values in
  703. dict_; otherwise, the values in self may be preserved or merged. When
  704. and_source is True, the 'source' attribute is included in the copy.
  705. NOTE: When replace is False, and self contains a 'source' attribute,
  706. 'source' is not replaced even when dict_ has a 'source'
  707. attribute, though it may still be merged into a list depending
  708. on the value of update_fun.
  709. NOTE: It is easier to call the update-specific methods then to pass
  710. the update_fun method to this function.
  711. """
  712. if isinstance(dict_, Node):
  713. dict_ = dict_.attributes
  714. # Include the source attribute when copying?
  715. if and_source:
  716. filter_fun = self.is_not_list_attribute
  717. else:
  718. filter_fun = self.is_not_known_attribute
  719. # Copy the basic attributes
  720. self.update_basic_atts(dict_)
  721. # Grab other attributes in dict_ not in self except the
  722. # (All basic attributes should be copied already)
  723. for att in filter(filter_fun, dict_):
  724. update_fun(self, att, dict_[att], replace)
  725. def update_all_atts_consistantly(self, dict_, replace=True,
  726. and_source=False):
  727. """
  728. Updates all attributes from node or dictionary `dict_`.
  729. Appends the basic attributes ('ids', 'names', 'classes',
  730. 'dupnames', but not 'source') and then, for all other attributes in
  731. dict_, updates the same attribute in self. When attributes with the
  732. same identifier appear in both self and dict_ and replace is True, the
  733. values in self are replaced with the values in dict_; otherwise, the
  734. values in self are preserved. When and_source is True, the 'source'
  735. attribute is included in the copy.
  736. NOTE: When replace is False, and self contains a 'source' attribute,
  737. 'source' is not replaced even when dict_ has a 'source'
  738. attribute, though it may still be merged into a list depending
  739. on the value of update_fun.
  740. """
  741. self.update_all_atts(dict_, Element.copy_attr_consistent, replace,
  742. and_source)
  743. def update_all_atts_concatenating(self, dict_, replace=True,
  744. and_source=False):
  745. """
  746. Updates all attributes from node or dictionary `dict_`.
  747. Appends the basic attributes ('ids', 'names', 'classes',
  748. 'dupnames', but not 'source') and then, for all other attributes in
  749. dict_, updates the same attribute in self. When attributes with the
  750. same identifier appear in both self and dict_ whose values aren't each
  751. lists and replace is True, the values in self are replaced with the
  752. values in dict_; if the values from self and dict_ for the given
  753. identifier are both of list type, then the two lists are concatenated
  754. and the result stored in self; otherwise, the values in self are
  755. preserved. When and_source is True, the 'source' attribute is
  756. included in the copy.
  757. NOTE: When replace is False, and self contains a 'source' attribute,
  758. 'source' is not replaced even when dict_ has a 'source'
  759. attribute, though it may still be merged into a list depending
  760. on the value of update_fun.
  761. """
  762. self.update_all_atts(dict_, Element.copy_attr_concatenate, replace,
  763. and_source)
  764. def update_all_atts_coercion(self, dict_, replace=True,
  765. and_source=False):
  766. """
  767. Updates all attributes from node or dictionary `dict_`.
  768. Appends the basic attributes ('ids', 'names', 'classes',
  769. 'dupnames', but not 'source') and then, for all other attributes in
  770. dict_, updates the same attribute in self. When attributes with the
  771. same identifier appear in both self and dict_ whose values are both
  772. not lists and replace is True, the values in self are replaced with
  773. the values in dict_; if either of the values from self and dict_ for
  774. the given identifier are of list type, then first any non-lists are
  775. converted to 1-element lists and then the two lists are concatenated
  776. and the result stored in self; otherwise, the values in self are
  777. preserved. When and_source is True, the 'source' attribute is
  778. included in the copy.
  779. NOTE: When replace is False, and self contains a 'source' attribute,
  780. 'source' is not replaced even when dict_ has a 'source'
  781. attribute, though it may still be merged into a list depending
  782. on the value of update_fun.
  783. """
  784. self.update_all_atts(dict_, Element.copy_attr_coerce, replace,
  785. and_source)
  786. def update_all_atts_convert(self, dict_, and_source=False):
  787. """
  788. Updates all attributes from node or dictionary `dict_`.
  789. Appends the basic attributes ('ids', 'names', 'classes',
  790. 'dupnames', but not 'source') and then, for all other attributes in
  791. dict_, updates the same attribute in self. When attributes with the
  792. same identifier appear in both self and dict_ then first any non-lists
  793. are converted to 1-element lists and then the two lists are
  794. concatenated and the result stored in self; otherwise, the values in
  795. self are preserved. When and_source is True, the 'source' attribute
  796. is included in the copy.
  797. NOTE: When replace is False, and self contains a 'source' attribute,
  798. 'source' is not replaced even when dict_ has a 'source'
  799. attribute, though it may still be merged into a list depending
  800. on the value of update_fun.
  801. """
  802. self.update_all_atts(dict_, Element.copy_attr_convert,
  803. and_source=and_source)
  804. def clear(self):
  805. self.children = []
  806. def replace(self, old, new):
  807. """Replace one child `Node` with another child or children."""
  808. index = self.index(old)
  809. if isinstance(new, Node):
  810. self.setup_child(new)
  811. self[index] = new
  812. elif new is not None:
  813. self[index:index+1] = new
  814. def replace_self(self, new):
  815. """
  816. Replace `self` node with `new`, where `new` is a node or a
  817. list of nodes.
  818. """
  819. update = new
  820. if not isinstance(new, Node):
  821. # `new` is a list; update first child.
  822. try:
  823. update = new[0]
  824. except IndexError:
  825. update = None
  826. if isinstance(update, Element):
  827. update.update_basic_atts(self)
  828. else:
  829. # `update` is a Text node or `new` is an empty list.
  830. # Assert that we aren't losing any attributes.
  831. for att in self.basic_attributes:
  832. assert not self[att], \
  833. 'Losing "%s" attribute: %s' % (att, self[att])
  834. self.parent.replace(self, new)
  835. def first_child_matching_class(self, childclass, start=0, end=sys.maxsize):
  836. """
  837. Return the index of the first child whose class exactly matches.
  838. Parameters:
  839. - `childclass`: A `Node` subclass to search for, or a tuple of `Node`
  840. classes. If a tuple, any of the classes may match.
  841. - `start`: Initial index to check.
  842. - `end`: Initial index to *not* check.
  843. """
  844. if not isinstance(childclass, tuple):
  845. childclass = (childclass,)
  846. for index in range(start, min(len(self), end)):
  847. for c in childclass:
  848. if isinstance(self[index], c):
  849. return index
  850. return None
  851. def first_child_not_matching_class(self, childclass, start=0,
  852. end=sys.maxsize):
  853. """
  854. Return the index of the first child whose class does *not* match.
  855. Parameters:
  856. - `childclass`: A `Node` subclass to skip, or a tuple of `Node`
  857. classes. If a tuple, none of the classes may match.
  858. - `start`: Initial index to check.
  859. - `end`: Initial index to *not* check.
  860. """
  861. if not isinstance(childclass, tuple):
  862. childclass = (childclass,)
  863. for index in range(start, min(len(self), end)):
  864. for c in childclass:
  865. if isinstance(self.children[index], c):
  866. break
  867. else:
  868. return index
  869. return None
  870. def pformat(self, indent=' ', level=0):
  871. tagline = '%s%s\n' % (indent*level, self.starttag())
  872. childreps = (c.pformat(indent, level+1) for c in self.children)
  873. return ''.join((tagline, *childreps))
  874. def copy(self):
  875. obj = self.__class__(rawsource=self.rawsource, **self.attributes)
  876. obj._document = self._document
  877. obj.source = self.source
  878. obj.line = self.line
  879. return obj
  880. def deepcopy(self):
  881. copy = self.copy()
  882. copy.extend([child.deepcopy() for child in self.children])
  883. return copy
  884. def set_class(self, name):
  885. """Add a new class to the "classes" attribute."""
  886. warnings.warn('docutils.nodes.Element.set_class() is deprecated; '
  887. ' and will be removed in Docutils 0.21 or later.',
  888. "Append to Element['classes'] list attribute directly",
  889. DeprecationWarning, stacklevel=2)
  890. assert ' ' not in name
  891. self['classes'].append(name.lower())
  892. def note_referenced_by(self, name=None, id=None):
  893. """Note that this Element has been referenced by its name
  894. `name` or id `id`."""
  895. self.referenced = 1
  896. # Element.expect_referenced_by_* dictionaries map names or ids
  897. # to nodes whose ``referenced`` attribute is set to true as
  898. # soon as this node is referenced by the given name or id.
  899. # Needed for target propagation.
  900. by_name = getattr(self, 'expect_referenced_by_name', {}).get(name)
  901. by_id = getattr(self, 'expect_referenced_by_id', {}).get(id)
  902. if by_name:
  903. assert name is not None
  904. by_name.referenced = 1
  905. if by_id:
  906. assert id is not None
  907. by_id.referenced = 1
  908. @classmethod
  909. def is_not_list_attribute(cls, attr):
  910. """
  911. Returns True if and only if the given attribute is NOT one of the
  912. basic list attributes defined for all Elements.
  913. """
  914. return attr not in cls.list_attributes
  915. @classmethod
  916. def is_not_known_attribute(cls, attr):
  917. """
  918. Returns True if and only if the given attribute is NOT recognized by
  919. this class.
  920. """
  921. return attr not in cls.known_attributes
  922. class TextElement(Element):
  923. """
  924. An element which directly contains text.
  925. Its children are all `Text` or `Inline` subclass nodes. You can
  926. check whether an element's context is inline simply by checking whether
  927. its immediate parent is a `TextElement` instance (including subclasses).
  928. This is handy for nodes like `image` that can appear both inline and as
  929. standalone body elements.
  930. If passing children to `__init__()`, make sure to set `text` to
  931. ``''`` or some other suitable value.
  932. """
  933. child_text_separator = ''
  934. """Separator for child nodes, used by `astext()` method."""
  935. def __init__(self, rawsource='', text='', *children, **attributes):
  936. if text != '':
  937. textnode = Text(text)
  938. Element.__init__(self, rawsource, textnode, *children,
  939. **attributes)
  940. else:
  941. Element.__init__(self, rawsource, *children, **attributes)
  942. class FixedTextElement(TextElement):
  943. """An element which directly contains preformatted text."""
  944. def __init__(self, rawsource='', text='', *children, **attributes):
  945. TextElement.__init__(self, rawsource, text, *children, **attributes)
  946. self.attributes['xml:space'] = 'preserve'
  947. # ========
  948. # Mixins
  949. # ========
  950. class Resolvable:
  951. resolved = 0
  952. class BackLinkable:
  953. def add_backref(self, refid):
  954. self['backrefs'].append(refid)
  955. # ====================
  956. # Element Categories
  957. # ====================
  958. class Root:
  959. pass
  960. class Titular:
  961. pass
  962. class PreBibliographic:
  963. """Category of Node which may occur before Bibliographic Nodes."""
  964. class Bibliographic:
  965. pass
  966. class Decorative(PreBibliographic):
  967. pass
  968. class Structural:
  969. pass
  970. class Body:
  971. pass
  972. class General(Body):
  973. pass
  974. class Sequential(Body):
  975. """List-like elements."""
  976. class Admonition(Body): pass
  977. class Special(Body):
  978. """Special internal body elements."""
  979. class Invisible(PreBibliographic):
  980. """Internal elements that don't appear in output."""
  981. class Part:
  982. pass
  983. class Inline:
  984. pass
  985. class Referential(Resolvable):
  986. pass
  987. class Targetable(Resolvable):
  988. referenced = 0
  989. indirect_reference_name = None
  990. """Holds the whitespace_normalized_name (contains mixed case) of a target.
  991. Required for MoinMoin/reST compatibility."""
  992. class Labeled:
  993. """Contains a `label` as its first element."""
  994. # ==============
  995. # Root Element
  996. # ==============
  997. class document(Root, Structural, Element):
  998. """
  999. The document root element.
  1000. Do not instantiate this class directly; use
  1001. `docutils.utils.new_document()` instead.
  1002. """
  1003. def __init__(self, settings, reporter, *args, **kwargs):
  1004. Element.__init__(self, *args, **kwargs)
  1005. self.current_source = None
  1006. """Path to or description of the input source being processed."""
  1007. self.current_line = None
  1008. """Line number (1-based) of `current_source`."""
  1009. self.settings = settings
  1010. """Runtime settings data record."""
  1011. self.reporter = reporter
  1012. """System message generator."""
  1013. self.indirect_targets = []
  1014. """List of indirect target nodes."""
  1015. self.substitution_defs = {}
  1016. """Mapping of substitution names to substitution_definition nodes."""
  1017. self.substitution_names = {}
  1018. """Mapping of case-normalized substitution names to case-sensitive
  1019. names."""
  1020. self.refnames = {}
  1021. """Mapping of names to lists of referencing nodes."""
  1022. self.refids = {}
  1023. """Mapping of ids to lists of referencing nodes."""
  1024. self.nameids = {}
  1025. """Mapping of names to unique id's."""
  1026. self.nametypes = {}
  1027. """Mapping of names to hyperlink type (boolean: True => explicit,
  1028. False => implicit."""
  1029. self.ids = {}
  1030. """Mapping of ids to nodes."""
  1031. self.footnote_refs = {}
  1032. """Mapping of footnote labels to lists of footnote_reference nodes."""
  1033. self.citation_refs = {}
  1034. """Mapping of citation labels to lists of citation_reference nodes."""
  1035. self.autofootnotes = []
  1036. """List of auto-numbered footnote nodes."""
  1037. self.autofootnote_refs = []
  1038. """List of auto-numbered footnote_reference nodes."""
  1039. self.symbol_footnotes = []
  1040. """List of symbol footnote nodes."""
  1041. self.symbol_footnote_refs = []
  1042. """List of symbol footnote_reference nodes."""
  1043. self.footnotes = []
  1044. """List of manually-numbered footnote nodes."""
  1045. self.citations = []
  1046. """List of citation nodes."""
  1047. self.autofootnote_start = 1
  1048. """Initial auto-numbered footnote number."""
  1049. self.symbol_footnote_start = 0
  1050. """Initial symbol footnote symbol index."""
  1051. self.id_counter = Counter()
  1052. """Numbers added to otherwise identical IDs."""
  1053. self.parse_messages = []
  1054. """System messages generated while parsing."""
  1055. self.transform_messages = []
  1056. """System messages generated while applying transforms."""
  1057. import docutils.transforms
  1058. self.transformer = docutils.transforms.Transformer(self)
  1059. """Storage for transforms to be applied to this document."""
  1060. self.include_log = []
  1061. """The current source's parents (to detect inclusion loops)."""
  1062. self.decoration = None
  1063. """Document's `decoration` node."""
  1064. self._document = self
  1065. def __getstate__(self):
  1066. """
  1067. Return dict with unpicklable references removed.
  1068. """
  1069. state = self.__dict__.copy()
  1070. state['reporter'] = None
  1071. state['transformer'] = None
  1072. return state
  1073. def asdom(self, dom=None):
  1074. """Return a DOM representation of this document."""
  1075. if dom is None:
  1076. import xml.dom.minidom as dom
  1077. domroot = dom.Document()
  1078. domroot.appendChild(self._dom_node(domroot))
  1079. return domroot
  1080. def set_id(self, node, msgnode=None, suggested_prefix=''):
  1081. if node['ids']:
  1082. # register and check for duplicates
  1083. for id in node['ids']:
  1084. self.ids.setdefault(id, node)
  1085. if self.ids[id] is not node:
  1086. msg = self.reporter.severe('Duplicate ID: "%s".' % id)
  1087. if msgnode is not None:
  1088. msgnode += msg
  1089. return id
  1090. # generate and set id
  1091. id_prefix = self.settings.id_prefix
  1092. auto_id_prefix = self.settings.auto_id_prefix
  1093. base_id = ''
  1094. id = ''
  1095. for name in node['names']:
  1096. if id_prefix:
  1097. # allow names starting with numbers if `id_prefix`
  1098. base_id = make_id('x'+name)[1:]
  1099. else:
  1100. base_id = make_id(name)
  1101. # TODO: normalize id-prefix? (would make code simpler)
  1102. id = id_prefix + base_id
  1103. if base_id and id not in self.ids:
  1104. break
  1105. else:
  1106. if base_id and auto_id_prefix.endswith('%'):
  1107. # disambiguate name-derived ID
  1108. # TODO: remove second condition after announcing change
  1109. prefix = id + '-'
  1110. else:
  1111. prefix = id_prefix + auto_id_prefix
  1112. if prefix.endswith('%'):
  1113. prefix = '%s%s-' % (prefix[:-1],
  1114. suggested_prefix
  1115. or make_id(node.tagname))
  1116. while True:
  1117. self.id_counter[prefix] += 1
  1118. id = '%s%d' % (prefix, self.id_counter[prefix])
  1119. if id not in self.ids:
  1120. break
  1121. node['ids'].append(id)
  1122. self.ids[id] = node
  1123. return id
  1124. def set_name_id_map(self, node, id, msgnode=None, explicit=None):
  1125. """
  1126. `self.nameids` maps names to IDs, while `self.nametypes` maps names to
  1127. booleans representing hyperlink type (True==explicit,
  1128. False==implicit). This method updates the mappings.
  1129. The following state transition table shows how `self.nameids` items
  1130. ("id") and `self.nametypes` items ("type") change with new input
  1131. (a call to this method), and what actions are performed
  1132. ("implicit"-type system messages are INFO/1, and
  1133. "explicit"-type system messages are ERROR/3):
  1134. ==== ===== ======== ======== ======= ==== ===== =====
  1135. Old State Input Action New State Notes
  1136. ----------- -------- ----------------- ----------- -----
  1137. id type new type sys.msg. dupname id type
  1138. ==== ===== ======== ======== ======= ==== ===== =====
  1139. - - explicit - - new True
  1140. - - implicit - - new False
  1141. - False explicit - - new True
  1142. old False explicit implicit old new True
  1143. - True explicit explicit new - True
  1144. old True explicit explicit new,old - True [#]_
  1145. - False implicit implicit new - False
  1146. old False implicit implicit new,old - False
  1147. - True implicit implicit new - True
  1148. old True implicit implicit new old True
  1149. ==== ===== ======== ======== ======= ==== ===== =====
  1150. .. [#] Do not clear the name-to-id map or invalidate the old target if
  1151. both old and new targets are external and refer to identical URIs.
  1152. The new target is invalidated regardless.
  1153. """
  1154. for name in tuple(node['names']):
  1155. if name in self.nameids:
  1156. self.set_duplicate_name_id(node, id, name, msgnode, explicit)
  1157. # attention: modifies node['names']
  1158. else:
  1159. self.nameids[name] = id
  1160. self.nametypes[name] = explicit
  1161. def set_duplicate_name_id(self, node, id, name, msgnode, explicit):
  1162. old_id = self.nameids[name]
  1163. old_explicit = self.nametypes[name]
  1164. self.nametypes[name] = old_explicit or explicit
  1165. if explicit:
  1166. if old_explicit:
  1167. level = 2
  1168. if old_id is not None:
  1169. old_node = self.ids[old_id]
  1170. if 'refuri' in node:
  1171. refuri = node['refuri']
  1172. if (old_node['names']
  1173. and 'refuri' in old_node
  1174. and old_node['refuri'] == refuri):
  1175. level = 1 # just inform if refuri's identical
  1176. if level > 1:
  1177. dupname(old_node, name)
  1178. self.nameids[name] = None
  1179. msg = self.reporter.system_message(
  1180. level, 'Duplicate explicit target name: "%s".' % name,
  1181. backrefs=[id], base_node=node)
  1182. if msgnode is not None:
  1183. msgnode += msg
  1184. dupname(node, name)
  1185. else:
  1186. self.nameids[name] = id
  1187. if old_id is not None:
  1188. old_node = self.ids[old_id]
  1189. dupname(old_node, name)
  1190. else:
  1191. if old_id is not None and not old_explicit:
  1192. self.nameids[name] = None
  1193. old_node = self.ids[old_id]
  1194. dupname(old_node, name)
  1195. dupname(node, name)
  1196. if not explicit or (not old_explicit and old_id is not None):
  1197. msg = self.reporter.info(
  1198. 'Duplicate implicit target name: "%s".' % name,
  1199. backrefs=[id], base_node=node)
  1200. if msgnode is not None:
  1201. msgnode += msg
  1202. def has_name(self, name):
  1203. return name in self.nameids
  1204. # "note" here is an imperative verb: "take note of".
  1205. def note_implicit_target(self, target, msgnode=None):
  1206. id = self.set_id(target, msgnode)
  1207. self.set_name_id_map(target, id, msgnode, explicit=False)
  1208. def note_explicit_target(self, target, msgnode=None):
  1209. id = self.set_id(target, msgnode)
  1210. self.set_name_id_map(target, id, msgnode, explicit=True)
  1211. def note_refname(self, node):
  1212. self.refnames.setdefault(node['refname'], []).append(node)
  1213. def note_refid(self, node):
  1214. self.refids.setdefault(node['refid'], []).append(node)
  1215. def note_indirect_target(self, target):
  1216. self.indirect_targets.append(target)
  1217. if target['names']:
  1218. self.note_refname(target)
  1219. def note_anonymous_target(self, target):
  1220. self.set_id(target)
  1221. def note_autofootnote(self, footnote):
  1222. self.set_id(footnote)
  1223. self.autofootnotes.append(footnote)
  1224. def note_autofootnote_ref(self, ref):
  1225. self.set_id(ref)
  1226. self.autofootnote_refs.append(ref)
  1227. def note_symbol_footnote(self, footnote):
  1228. self.set_id(footnote)
  1229. self.symbol_footnotes.append(footnote)
  1230. def note_symbol_footnote_ref(self, ref):
  1231. self.set_id(ref)
  1232. self.symbol_footnote_refs.append(ref)
  1233. def note_footnote(self, footnote):
  1234. self.set_id(footnote)
  1235. self.footnotes.append(footnote)
  1236. def note_footnote_ref(self, ref):
  1237. self.set_id(ref)
  1238. self.footnote_refs.setdefault(ref['refname'], []).append(ref)
  1239. self.note_refname(ref)
  1240. def note_citation(self, citation):
  1241. self.citations.append(citation)
  1242. def note_citation_ref(self, ref):
  1243. self.set_id(ref)
  1244. self.citation_refs.setdefault(ref['refname'], []).append(ref)
  1245. self.note_refname(ref)
  1246. def note_substitution_def(self, subdef, def_name, msgnode=None):
  1247. name = whitespace_normalize_name(def_name)
  1248. if name in self.substitution_defs:
  1249. msg = self.reporter.error(
  1250. 'Duplicate substitution definition name: "%s".' % name,
  1251. base_node=subdef)
  1252. if msgnode is not None:
  1253. msgnode += msg
  1254. oldnode = self.substitution_defs[name]
  1255. dupname(oldnode, name)
  1256. # keep only the last definition:
  1257. self.substitution_defs[name] = subdef
  1258. # case-insensitive mapping:
  1259. self.substitution_names[fully_normalize_name(name)] = name
  1260. def note_substitution_ref(self, subref, refname):
  1261. subref['refname'] = whitespace_normalize_name(refname)
  1262. def note_pending(self, pending, priority=None):
  1263. self.transformer.add_pending(pending, priority)
  1264. def note_parse_message(self, message):
  1265. self.parse_messages.append(message)
  1266. def note_transform_message(self, message):
  1267. self.transform_messages.append(message)
  1268. def note_source(self, source, offset):
  1269. self.current_source = source
  1270. if offset is None:
  1271. self.current_line = offset
  1272. else:
  1273. self.current_line = offset + 1
  1274. def copy(self):
  1275. obj = self.__class__(self.settings, self.reporter,
  1276. **self.attributes)
  1277. obj.source = self.source
  1278. obj.line = self.line
  1279. return obj
  1280. def get_decoration(self):
  1281. if not self.decoration:
  1282. self.decoration = decoration()
  1283. index = self.first_child_not_matching_class((Titular, meta))
  1284. if index is None:
  1285. self.append(self.decoration)
  1286. else:
  1287. self.insert(index, self.decoration)
  1288. return self.decoration
  1289. # ================
  1290. # Title Elements
  1291. # ================
  1292. class title(Titular, PreBibliographic, TextElement): pass
  1293. class subtitle(Titular, PreBibliographic, TextElement): pass
  1294. class rubric(Titular, TextElement): pass
  1295. # ==================
  1296. # Meta-Data Element
  1297. # ==================
  1298. class meta(PreBibliographic, Element):
  1299. """Container for "invisible" bibliographic data, or meta-data."""
  1300. # ========================
  1301. # Bibliographic Elements
  1302. # ========================
  1303. class docinfo(Bibliographic, Element): pass
  1304. class author(Bibliographic, TextElement): pass
  1305. class authors(Bibliographic, Element): pass
  1306. class organization(Bibliographic, TextElement): pass
  1307. class address(Bibliographic, FixedTextElement): pass
  1308. class contact(Bibliographic, TextElement): pass
  1309. class version(Bibliographic, TextElement): pass
  1310. class revision(Bibliographic, TextElement): pass
  1311. class status(Bibliographic, TextElement): pass
  1312. class date(Bibliographic, TextElement): pass
  1313. class copyright(Bibliographic, TextElement): pass
  1314. # =====================
  1315. # Decorative Elements
  1316. # =====================
  1317. class decoration(Decorative, Element):
  1318. def get_header(self):
  1319. if not len(self.children) or not isinstance(self.children[0], header):
  1320. self.insert(0, header())
  1321. return self.children[0]
  1322. def get_footer(self):
  1323. if not len(self.children) or not isinstance(self.children[-1], footer):
  1324. self.append(footer())
  1325. return self.children[-1]
  1326. class header(Decorative, Element): pass
  1327. class footer(Decorative, Element): pass
  1328. # =====================
  1329. # Structural Elements
  1330. # =====================
  1331. class section(Structural, Element): pass
  1332. class topic(Structural, Element):
  1333. """
  1334. Topics are terminal, "leaf" mini-sections, like block quotes with titles,
  1335. or textual figures. A topic is just like a section, except that it has no
  1336. subsections, and it doesn't have to conform to section placement rules.
  1337. Topics are allowed wherever body elements (list, table, etc.) are allowed,
  1338. but only at the top level of a section or document. Topics cannot nest
  1339. inside topics, sidebars, or body elements; you can't have a topic inside a
  1340. table, list, block quote, etc.
  1341. """
  1342. class sidebar(Structural, Element):
  1343. """
  1344. Sidebars are like miniature, parallel documents that occur inside other
  1345. documents, providing related or reference material. A sidebar is
  1346. typically offset by a border and "floats" to the side of the page; the
  1347. document's main text may flow around it. Sidebars can also be likened to
  1348. super-footnotes; their content is outside of the flow of the document's
  1349. main text.
  1350. Sidebars are allowed wherever body elements (list, table, etc.) are
  1351. allowed, but only at the top level of a section or document. Sidebars
  1352. cannot nest inside sidebars, topics, or body elements; you can't have a
  1353. sidebar inside a table, list, block quote, etc.
  1354. """
  1355. class transition(Structural, Element): pass
  1356. # ===============
  1357. # Body Elements
  1358. # ===============
  1359. class paragraph(General, TextElement): pass
  1360. class compound(General, Element): pass
  1361. class container(General, Element): pass
  1362. class bullet_list(Sequential, Element): pass
  1363. class enumerated_list(Sequential, Element): pass
  1364. class list_item(Part, Element): pass
  1365. class definition_list(Sequential, Element): pass
  1366. class definition_list_item(Part, Element): pass
  1367. class term(Part, TextElement): pass
  1368. class classifier(Part, TextElement): pass
  1369. class definition(Part, Element): pass
  1370. class field_list(Sequential, Element): pass
  1371. class field(Part, Element): pass
  1372. class field_name(Part, TextElement): pass
  1373. class field_body(Part, Element): pass
  1374. class option(Part, Element):
  1375. child_text_separator = ''
  1376. class option_argument(Part, TextElement):
  1377. def astext(self):
  1378. return self.get('delimiter', ' ') + TextElement.astext(self)
  1379. class option_group(Part, Element):
  1380. child_text_separator = ', '
  1381. class option_list(Sequential, Element): pass
  1382. class option_list_item(Part, Element):
  1383. child_text_separator = ' '
  1384. class option_string(Part, TextElement): pass
  1385. class description(Part, Element): pass
  1386. class literal_block(General, FixedTextElement): pass
  1387. class doctest_block(General, FixedTextElement): pass
  1388. class math_block(General, FixedTextElement): pass
  1389. class line_block(General, Element): pass
  1390. class line(Part, TextElement):
  1391. indent = None
  1392. class block_quote(General, Element): pass
  1393. class attribution(Part, TextElement): pass
  1394. class attention(Admonition, Element): pass
  1395. class caution(Admonition, Element): pass
  1396. class danger(Admonition, Element): pass
  1397. class error(Admonition, Element): pass
  1398. class important(Admonition, Element): pass
  1399. class note(Admonition, Element): pass
  1400. class tip(Admonition, Element): pass
  1401. class hint(Admonition, Element): pass
  1402. class warning(Admonition, Element): pass
  1403. class admonition(Admonition, Element): pass
  1404. class comment(Special, Invisible, FixedTextElement): pass
  1405. class substitution_definition(Special, Invisible, TextElement): pass
  1406. class target(Special, Invisible, Inline, TextElement, Targetable): pass
  1407. class footnote(General, BackLinkable, Element, Labeled, Targetable): pass
  1408. class citation(General, BackLinkable, Element, Labeled, Targetable): pass
  1409. class label(Part, TextElement): pass
  1410. class figure(General, Element): pass
  1411. class caption(Part, TextElement): pass
  1412. class legend(Part, Element): pass
  1413. class table(General, Element): pass
  1414. class tgroup(Part, Element): pass
  1415. class colspec(Part, Element): pass
  1416. class thead(Part, Element): pass
  1417. class tbody(Part, Element): pass
  1418. class row(Part, Element): pass
  1419. class entry(Part, Element): pass
  1420. class system_message(Special, BackLinkable, PreBibliographic, Element):
  1421. """
  1422. System message element.
  1423. Do not instantiate this class directly; use
  1424. ``document.reporter.info/warning/error/severe()`` instead.
  1425. """
  1426. def __init__(self, message=None, *children, **attributes):
  1427. rawsource = attributes.pop('rawsource', '')
  1428. if message:
  1429. p = paragraph('', message)
  1430. children = (p,) + children
  1431. try:
  1432. Element.__init__(self, rawsource, *children, **attributes)
  1433. except: # noqa catchall
  1434. print('system_message: children=%r' % (children,))
  1435. raise
  1436. def astext(self):
  1437. line = self.get('line', '')
  1438. return '%s:%s: (%s/%s) %s' % (self['source'], line, self['type'],
  1439. self['level'], Element.astext(self))
  1440. class pending(Special, Invisible, Element):
  1441. """
  1442. The "pending" element is used to encapsulate a pending operation: the
  1443. operation (transform), the point at which to apply it, and any data it
  1444. requires. Only the pending operation's location within the document is
  1445. stored in the public document tree (by the "pending" object itself); the
  1446. operation and its data are stored in the "pending" object's internal
  1447. instance attributes.
  1448. For example, say you want a table of contents in your reStructuredText
  1449. document. The easiest way to specify where to put it is from within the
  1450. document, with a directive::
  1451. .. contents::
  1452. But the "contents" directive can't do its work until the entire document
  1453. has been parsed and possibly transformed to some extent. So the directive
  1454. code leaves a placeholder behind that will trigger the second phase of its
  1455. processing, something like this::
  1456. <pending ...public attributes...> + internal attributes
  1457. Use `document.note_pending()` so that the
  1458. `docutils.transforms.Transformer` stage of processing can run all pending
  1459. transforms.
  1460. """
  1461. def __init__(self, transform, details=None,
  1462. rawsource='', *children, **attributes):
  1463. Element.__init__(self, rawsource, *children, **attributes)
  1464. self.transform = transform
  1465. """The `docutils.transforms.Transform` class implementing the pending
  1466. operation."""
  1467. self.details = details or {}
  1468. """Detail data (dictionary) required by the pending operation."""
  1469. def pformat(self, indent=' ', level=0):
  1470. internals = ['.. internal attributes:',
  1471. ' .transform: %s.%s' % (self.transform.__module__,
  1472. self.transform.__name__),
  1473. ' .details:']
  1474. details = sorted(self.details.items())
  1475. for key, value in details:
  1476. if isinstance(value, Node):
  1477. internals.append('%7s%s:' % ('', key))
  1478. internals.extend(['%9s%s' % ('', line)
  1479. for line in value.pformat().splitlines()])
  1480. elif (value
  1481. and isinstance(value, list)
  1482. and isinstance(value[0], Node)):
  1483. internals.append('%7s%s:' % ('', key))
  1484. for v in value:
  1485. internals.extend(['%9s%s' % ('', line)
  1486. for line in v.pformat().splitlines()])
  1487. else:
  1488. internals.append('%7s%s: %r' % ('', key, value))
  1489. return (Element.pformat(self, indent, level)
  1490. + ''.join((' %s%s\n' % (indent * level, line))
  1491. for line in internals))
  1492. def copy(self):
  1493. obj = self.__class__(self.transform, self.details, self.rawsource,
  1494. **self.attributes)
  1495. obj._document = self._document
  1496. obj.source = self.source
  1497. obj.line = self.line
  1498. return obj
  1499. class raw(Special, Inline, PreBibliographic, FixedTextElement):
  1500. """
  1501. Raw data that is to be passed untouched to the Writer.
  1502. """
  1503. # =================
  1504. # Inline Elements
  1505. # =================
  1506. class emphasis(Inline, TextElement): pass
  1507. class strong(Inline, TextElement): pass
  1508. class literal(Inline, TextElement): pass
  1509. class reference(General, Inline, Referential, TextElement): pass
  1510. class footnote_reference(Inline, Referential, TextElement): pass
  1511. class citation_reference(Inline, Referential, TextElement): pass
  1512. class substitution_reference(Inline, TextElement): pass
  1513. class title_reference(Inline, TextElement): pass
  1514. class abbreviation(Inline, TextElement): pass
  1515. class acronym(Inline, TextElement): pass
  1516. class superscript(Inline, TextElement): pass
  1517. class subscript(Inline, TextElement): pass
  1518. class math(Inline, TextElement): pass
  1519. class image(General, Inline, Element):
  1520. def astext(self):
  1521. return self.get('alt', '')
  1522. class inline(Inline, TextElement): pass
  1523. class problematic(Inline, TextElement): pass
  1524. class generated(Inline, TextElement): pass
  1525. # ========================================
  1526. # Auxiliary Classes, Functions, and Data
  1527. # ========================================
  1528. node_class_names = """
  1529. Text
  1530. abbreviation acronym address admonition attention attribution author
  1531. authors
  1532. block_quote bullet_list
  1533. caption caution citation citation_reference classifier colspec comment
  1534. compound contact container copyright
  1535. danger date decoration definition definition_list definition_list_item
  1536. description docinfo doctest_block document
  1537. emphasis entry enumerated_list error
  1538. field field_body field_list field_name figure footer
  1539. footnote footnote_reference
  1540. generated
  1541. header hint
  1542. image important inline
  1543. label legend line line_block list_item literal literal_block
  1544. math math_block meta
  1545. note
  1546. option option_argument option_group option_list option_list_item
  1547. option_string organization
  1548. paragraph pending problematic
  1549. raw reference revision row rubric
  1550. section sidebar status strong subscript substitution_definition
  1551. substitution_reference subtitle superscript system_message
  1552. table target tbody term tgroup thead tip title title_reference topic
  1553. transition
  1554. version
  1555. warning""".split()
  1556. """A list of names of all concrete Node subclasses."""
  1557. class NodeVisitor:
  1558. """
  1559. "Visitor" pattern [GoF95]_ abstract superclass implementation for
  1560. document tree traversals.
  1561. Each node class has corresponding methods, doing nothing by
  1562. default; override individual methods for specific and useful
  1563. behaviour. The `dispatch_visit()` method is called by
  1564. `Node.walk()` upon entering a node. `Node.walkabout()` also calls
  1565. the `dispatch_departure()` method before exiting a node.
  1566. The dispatch methods call "``visit_`` + node class name" or
  1567. "``depart_`` + node class name", resp.
  1568. This is a base class for visitors whose ``visit_...`` & ``depart_...``
  1569. methods must be implemented for *all* compulsory node types encountered
  1570. (such as for `docutils.writers.Writer` subclasses).
  1571. Unimplemented methods will raise exceptions (except for optional nodes).
  1572. For sparse traversals, where only certain node types are of interest, use
  1573. subclass `SparseNodeVisitor` instead. When (mostly or entirely) uniform
  1574. processing is desired, subclass `GenericNodeVisitor`.
  1575. .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of
  1576. Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,
  1577. 1995.
  1578. """
  1579. optional = ('meta',)
  1580. """
  1581. Tuple containing node class names (as strings).
  1582. No exception will be raised if writers do not implement visit
  1583. or departure functions for these node classes.
  1584. Used to ensure transitional compatibility with existing 3rd-party writers.
  1585. """
  1586. def __init__(self, document):
  1587. self.document = document
  1588. def dispatch_visit(self, node):
  1589. """
  1590. Call self."``visit_`` + node class name" with `node` as
  1591. parameter. If the ``visit_...`` method does not exist, call
  1592. self.unknown_visit.
  1593. """
  1594. node_name = node.__class__.__name__
  1595. method = getattr(self, 'visit_' + node_name, self.unknown_visit)
  1596. self.document.reporter.debug(
  1597. 'docutils.nodes.NodeVisitor.dispatch_visit calling %s for %s'
  1598. % (method.__name__, node_name))
  1599. return method(node)
  1600. def dispatch_departure(self, node):
  1601. """
  1602. Call self."``depart_`` + node class name" with `node` as
  1603. parameter. If the ``depart_...`` method does not exist, call
  1604. self.unknown_departure.
  1605. """
  1606. node_name = node.__class__.__name__
  1607. method = getattr(self, 'depart_' + node_name, self.unknown_departure)
  1608. self.document.reporter.debug(
  1609. 'docutils.nodes.NodeVisitor.dispatch_departure calling %s for %s'
  1610. % (method.__name__, node_name))
  1611. return method(node)
  1612. def unknown_visit(self, node):
  1613. """
  1614. Called when entering unknown `Node` types.
  1615. Raise an exception unless overridden.
  1616. """
  1617. if (self.document.settings.strict_visitor
  1618. or node.__class__.__name__ not in self.optional):
  1619. raise NotImplementedError(
  1620. '%s visiting unknown node type: %s'
  1621. % (self.__class__, node.__class__.__name__))
  1622. def unknown_departure(self, node):
  1623. """
  1624. Called before exiting unknown `Node` types.
  1625. Raise exception unless overridden.
  1626. """
  1627. if (self.document.settings.strict_visitor
  1628. or node.__class__.__name__ not in self.optional):
  1629. raise NotImplementedError(
  1630. '%s departing unknown node type: %s'
  1631. % (self.__class__, node.__class__.__name__))
  1632. class SparseNodeVisitor(NodeVisitor):
  1633. """
  1634. Base class for sparse traversals, where only certain node types are of
  1635. interest. When ``visit_...`` & ``depart_...`` methods should be
  1636. implemented for *all* node types (such as for `docutils.writers.Writer`
  1637. subclasses), subclass `NodeVisitor` instead.
  1638. """
  1639. class GenericNodeVisitor(NodeVisitor):
  1640. """
  1641. Generic "Visitor" abstract superclass, for simple traversals.
  1642. Unless overridden, each ``visit_...`` method calls `default_visit()`, and
  1643. each ``depart_...`` method (when using `Node.walkabout()`) calls
  1644. `default_departure()`. `default_visit()` (and `default_departure()`) must
  1645. be overridden in subclasses.
  1646. Define fully generic visitors by overriding `default_visit()` (and
  1647. `default_departure()`) only. Define semi-generic visitors by overriding
  1648. individual ``visit_...()`` (and ``depart_...()``) methods also.
  1649. `NodeVisitor.unknown_visit()` (`NodeVisitor.unknown_departure()`) should
  1650. be overridden for default behavior.
  1651. """
  1652. def default_visit(self, node):
  1653. """Override for generic, uniform traversals."""
  1654. raise NotImplementedError
  1655. def default_departure(self, node):
  1656. """Override for generic, uniform traversals."""
  1657. raise NotImplementedError
  1658. def _call_default_visit(self, node):
  1659. self.default_visit(node)
  1660. def _call_default_departure(self, node):
  1661. self.default_departure(node)
  1662. def _nop(self, node):
  1663. pass
  1664. def _add_node_class_names(names):
  1665. """Save typing with dynamic assignments:"""
  1666. for _name in names:
  1667. setattr(GenericNodeVisitor, "visit_" + _name, _call_default_visit)
  1668. setattr(GenericNodeVisitor, "depart_" + _name, _call_default_departure)
  1669. setattr(SparseNodeVisitor, 'visit_' + _name, _nop)
  1670. setattr(SparseNodeVisitor, 'depart_' + _name, _nop)
  1671. _add_node_class_names(node_class_names)
  1672. class TreeCopyVisitor(GenericNodeVisitor):
  1673. """
  1674. Make a complete copy of a tree or branch, including element attributes.
  1675. """
  1676. def __init__(self, document):
  1677. GenericNodeVisitor.__init__(self, document)
  1678. self.parent_stack = []
  1679. self.parent = []
  1680. def get_tree_copy(self):
  1681. return self.parent[0]
  1682. def default_visit(self, node):
  1683. """Copy the current node, and make it the new acting parent."""
  1684. newnode = node.copy()
  1685. self.parent.append(newnode)
  1686. self.parent_stack.append(self.parent)
  1687. self.parent = newnode
  1688. def default_departure(self, node):
  1689. """Restore the previous acting parent."""
  1690. self.parent = self.parent_stack.pop()
  1691. class TreePruningException(Exception):
  1692. """
  1693. Base class for `NodeVisitor`-related tree pruning exceptions.
  1694. Raise subclasses from within ``visit_...`` or ``depart_...`` methods
  1695. called from `Node.walk()` and `Node.walkabout()` tree traversals to prune
  1696. the tree traversed.
  1697. """
  1698. class SkipChildren(TreePruningException):
  1699. """
  1700. Do not visit any children of the current node. The current node's
  1701. siblings and ``depart_...`` method are not affected.
  1702. """
  1703. class SkipSiblings(TreePruningException):
  1704. """
  1705. Do not visit any more siblings (to the right) of the current node. The
  1706. current node's children and its ``depart_...`` method are not affected.
  1707. """
  1708. class SkipNode(TreePruningException):
  1709. """
  1710. Do not visit the current node's children, and do not call the current
  1711. node's ``depart_...`` method.
  1712. """
  1713. class SkipDeparture(TreePruningException):
  1714. """
  1715. Do not call the current node's ``depart_...`` method. The current node's
  1716. children and siblings are not affected.
  1717. """
  1718. class NodeFound(TreePruningException):
  1719. """
  1720. Raise to indicate that the target of a search has been found. This
  1721. exception must be caught by the client; it is not caught by the traversal
  1722. code.
  1723. """
  1724. class StopTraversal(TreePruningException):
  1725. """
  1726. Stop the traversal altogether. The current node's ``depart_...`` method
  1727. is not affected. The parent nodes ``depart_...`` methods are also called
  1728. as usual. No other nodes are visited. This is an alternative to
  1729. NodeFound that does not cause exception handling to trickle up to the
  1730. caller.
  1731. """
  1732. def make_id(string):
  1733. """
  1734. Convert `string` into an identifier and return it.
  1735. Docutils identifiers will conform to the regular expression
  1736. ``[a-z](-?[a-z0-9]+)*``. For CSS compatibility, identifiers (the "class"
  1737. and "id" attributes) should have no underscores, colons, or periods.
  1738. Hyphens may be used.
  1739. - The `HTML 4.01 spec`_ defines identifiers based on SGML tokens:
  1740. ID and NAME tokens must begin with a letter ([A-Za-z]) and may be
  1741. followed by any number of letters, digits ([0-9]), hyphens ("-"),
  1742. underscores ("_"), colons (":"), and periods (".").
  1743. - However the `CSS1 spec`_ defines identifiers based on the "name" token,
  1744. a tighter interpretation ("flex" tokenizer notation; "latin1" and
  1745. "escape" 8-bit characters have been replaced with entities)::
  1746. unicode \\[0-9a-f]{1,4}
  1747. latin1 [&iexcl;-&yuml;]
  1748. escape {unicode}|\\[ -~&iexcl;-&yuml;]
  1749. nmchar [-a-z0-9]|{latin1}|{escape}
  1750. name {nmchar}+
  1751. The CSS1 "nmchar" rule does not include underscores ("_"), colons (":"),
  1752. or periods ("."), therefore "class" and "id" attributes should not contain
  1753. these characters. They should be replaced with hyphens ("-"). Combined
  1754. with HTML's requirements (the first character must be a letter; no
  1755. "unicode", "latin1", or "escape" characters), this results in the
  1756. ``[a-z](-?[a-z0-9]+)*`` pattern.
  1757. .. _HTML 4.01 spec: https://www.w3.org/TR/html401
  1758. .. _CSS1 spec: https://www.w3.org/TR/REC-CSS1
  1759. """
  1760. id = string.lower()
  1761. id = id.translate(_non_id_translate_digraphs)
  1762. id = id.translate(_non_id_translate)
  1763. # get rid of non-ascii characters.
  1764. # 'ascii' lowercase to prevent problems with turkish locale.
  1765. id = unicodedata.normalize(
  1766. 'NFKD', id).encode('ascii', 'ignore').decode('ascii')
  1767. # shrink runs of whitespace and replace by hyphen
  1768. id = _non_id_chars.sub('-', ' '.join(id.split()))
  1769. id = _non_id_at_ends.sub('', id)
  1770. return str(id)
  1771. _non_id_chars = re.compile('[^a-z0-9]+')
  1772. _non_id_at_ends = re.compile('^[-0-9]+|-+$')
  1773. _non_id_translate = {
  1774. 0x00f8: 'o', # o with stroke
  1775. 0x0111: 'd', # d with stroke
  1776. 0x0127: 'h', # h with stroke
  1777. 0x0131: 'i', # dotless i
  1778. 0x0142: 'l', # l with stroke
  1779. 0x0167: 't', # t with stroke
  1780. 0x0180: 'b', # b with stroke
  1781. 0x0183: 'b', # b with topbar
  1782. 0x0188: 'c', # c with hook
  1783. 0x018c: 'd', # d with topbar
  1784. 0x0192: 'f', # f with hook
  1785. 0x0199: 'k', # k with hook
  1786. 0x019a: 'l', # l with bar
  1787. 0x019e: 'n', # n with long right leg
  1788. 0x01a5: 'p', # p with hook
  1789. 0x01ab: 't', # t with palatal hook
  1790. 0x01ad: 't', # t with hook
  1791. 0x01b4: 'y', # y with hook
  1792. 0x01b6: 'z', # z with stroke
  1793. 0x01e5: 'g', # g with stroke
  1794. 0x0225: 'z', # z with hook
  1795. 0x0234: 'l', # l with curl
  1796. 0x0235: 'n', # n with curl
  1797. 0x0236: 't', # t with curl
  1798. 0x0237: 'j', # dotless j
  1799. 0x023c: 'c', # c with stroke
  1800. 0x023f: 's', # s with swash tail
  1801. 0x0240: 'z', # z with swash tail
  1802. 0x0247: 'e', # e with stroke
  1803. 0x0249: 'j', # j with stroke
  1804. 0x024b: 'q', # q with hook tail
  1805. 0x024d: 'r', # r with stroke
  1806. 0x024f: 'y', # y with stroke
  1807. }
  1808. _non_id_translate_digraphs = {
  1809. 0x00df: 'sz', # ligature sz
  1810. 0x00e6: 'ae', # ae
  1811. 0x0153: 'oe', # ligature oe
  1812. 0x0238: 'db', # db digraph
  1813. 0x0239: 'qp', # qp digraph
  1814. }
  1815. def dupname(node, name):
  1816. node['dupnames'].append(name)
  1817. node['names'].remove(name)
  1818. # Assume that this method is referenced, even though it isn't; we
  1819. # don't want to throw unnecessary system_messages.
  1820. node.referenced = 1
  1821. def fully_normalize_name(name):
  1822. """Return a case- and whitespace-normalized name."""
  1823. return ' '.join(name.lower().split())
  1824. def whitespace_normalize_name(name):
  1825. """Return a whitespace-normalized name."""
  1826. return ' '.join(name.split())
  1827. def serial_escape(value):
  1828. """Escape string values that are elements of a list, for serialization."""
  1829. return value.replace('\\', r'\\').replace(' ', r'\ ')
  1830. def pseudo_quoteattr(value):
  1831. """Quote attributes for pseudo-xml"""
  1832. return '"%s"' % value