_html_base.py 69 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781
  1. #!/usr/bin/env python3
  2. # :Author: David Goodger, Günter Milde
  3. # Based on the html4css1 writer by David Goodger.
  4. # :Maintainer: docutils-develop@lists.sourceforge.net
  5. # :Revision: $Revision: 9081 $
  6. # :Date: $Date: 2005-06-28$
  7. # :Copyright: © 2016 David Goodger, Günter Milde
  8. # :License: Released under the terms of the `2-Clause BSD license`_, in short:
  9. #
  10. # Copying and distribution of this file, with or without modification,
  11. # are permitted in any medium without royalty provided the copyright
  12. # notice and this notice are preserved.
  13. # This file is offered as-is, without any warranty.
  14. #
  15. # .. _2-Clause BSD license: https://opensource.org/licenses/BSD-2-Clause
  16. """common definitions for Docutils HTML writers"""
  17. import base64
  18. import mimetypes
  19. import os
  20. import os.path
  21. import re
  22. from urllib.request import url2pathname
  23. import warnings
  24. import docutils
  25. from docutils import frontend, languages, nodes, utils, writers
  26. from docutils.parsers.rst.directives import length_or_percentage_or_unitless
  27. from docutils.parsers.rst.directives.images import PIL
  28. from docutils.transforms import writer_aux
  29. from docutils.utils.math import (unichar2tex, pick_math_environment,
  30. math2html, latex2mathml, tex2mathml_extern)
  31. class Writer(writers.Writer):
  32. supported = ('html', 'xhtml') # update in subclass
  33. """Formats this writer supports."""
  34. settings_spec = (
  35. 'HTML Writer Options',
  36. None,
  37. (('Specify the template file (UTF-8 encoded). '
  38. '(default: writer dependent)',
  39. ['--template'],
  40. {'metavar': '<file>'}),
  41. ('Comma separated list of stylesheet URLs. '
  42. 'Overrides previous --stylesheet and --stylesheet-path settings.',
  43. ['--stylesheet'],
  44. {'metavar': '<URL[,URL,...]>', 'overrides': 'stylesheet_path',
  45. 'validator': frontend.validate_comma_separated_list}),
  46. ('Comma separated list of stylesheet paths. '
  47. 'Relative paths are expanded if a matching file is found in '
  48. 'the --stylesheet-dirs. With --link-stylesheet, '
  49. 'the path is rewritten relative to the output HTML file. '
  50. '(default: writer dependent)',
  51. ['--stylesheet-path'],
  52. {'metavar': '<file[,file,...]>', 'overrides': 'stylesheet',
  53. 'validator': frontend.validate_comma_separated_list}),
  54. ('Comma-separated list of directories where stylesheets are found. '
  55. 'Used by --stylesheet-path when expanding relative path arguments. '
  56. '(default: writer dependent)',
  57. ['--stylesheet-dirs'],
  58. {'metavar': '<dir[,dir,...]>',
  59. 'validator': frontend.validate_comma_separated_list}),
  60. ('Embed the stylesheet(s) in the output HTML file. The stylesheet '
  61. 'files must be accessible during processing. (default)',
  62. ['--embed-stylesheet'],
  63. {'default': 1, 'action': 'store_true',
  64. 'validator': frontend.validate_boolean}),
  65. ('Link to the stylesheet(s) in the output HTML file. ',
  66. ['--link-stylesheet'],
  67. {'dest': 'embed_stylesheet', 'action': 'store_false'}),
  68. ('Specify the initial header level. '
  69. 'Does not affect document title & subtitle (see --no-doc-title).'
  70. '(default: writer dependent).',
  71. ['--initial-header-level'],
  72. {'choices': '1 2 3 4 5 6'.split(), 'default': '2',
  73. 'metavar': '<level>'}),
  74. ('Format for footnote references: one of "superscript" or '
  75. '"brackets". (default: "brackets")',
  76. ['--footnote-references'],
  77. {'choices': ['superscript', 'brackets'], 'default': 'brackets',
  78. 'metavar': '<format>',
  79. 'overrides': 'trim_footnote_reference_space'}),
  80. ('Format for block quote attributions: '
  81. 'one of "dash" (em-dash prefix), "parentheses"/"parens", or "none". '
  82. '(default: "dash")',
  83. ['--attribution'],
  84. {'choices': ['dash', 'parentheses', 'parens', 'none'],
  85. 'default': 'dash', 'metavar': '<format>'}),
  86. ('Remove extra vertical whitespace between items of "simple" bullet '
  87. 'lists and enumerated lists. (default)',
  88. ['--compact-lists'],
  89. {'default': True, 'action': 'store_true',
  90. 'validator': frontend.validate_boolean}),
  91. ('Disable compact simple bullet and enumerated lists.',
  92. ['--no-compact-lists'],
  93. {'dest': 'compact_lists', 'action': 'store_false'}),
  94. ('Remove extra vertical whitespace between items of simple field '
  95. 'lists. (default)',
  96. ['--compact-field-lists'],
  97. {'default': True, 'action': 'store_true',
  98. 'validator': frontend.validate_boolean}),
  99. ('Disable compact simple field lists.',
  100. ['--no-compact-field-lists'],
  101. {'dest': 'compact_field_lists', 'action': 'store_false'}),
  102. ('Added to standard table classes. '
  103. 'Defined styles: borderless, booktabs, '
  104. 'align-left, align-center, align-right, '
  105. 'colwidths-auto, colwidths-grid.',
  106. ['--table-style'],
  107. {'default': ''}),
  108. ('Math output format (one of "MathML", "HTML", "MathJax", '
  109. 'or "LaTeX") and option(s). '
  110. '(default: "HTML math.css")',
  111. ['--math-output'],
  112. {'default': 'HTML math.css'}),
  113. ('Prepend an XML declaration. ',
  114. ['--xml-declaration'],
  115. {'default': False, 'action': 'store_true',
  116. 'validator': frontend.validate_boolean}),
  117. ('Omit the XML declaration.',
  118. ['--no-xml-declaration'],
  119. {'dest': 'xml_declaration', 'action': 'store_false'}),
  120. ('Obfuscate email addresses to confuse harvesters while still '
  121. 'keeping email links usable with standards-compliant browsers.',
  122. ['--cloak-email-addresses'],
  123. {'action': 'store_true', 'validator': frontend.validate_boolean}),
  124. )
  125. )
  126. settings_defaults = {'output_encoding_error_handler': 'xmlcharrefreplace'}
  127. config_section = 'html base writer' # overwrite in subclass
  128. config_section_dependencies = ('writers', 'html writers')
  129. visitor_attributes = (
  130. 'head_prefix', 'head', 'stylesheet', 'body_prefix',
  131. 'body_pre_docinfo', 'docinfo', 'body', 'body_suffix',
  132. 'title', 'subtitle', 'header', 'footer', 'meta', 'fragment',
  133. 'html_prolog', 'html_head', 'html_title', 'html_subtitle',
  134. 'html_body')
  135. def get_transforms(self):
  136. return writers.Writer.get_transforms(self) + [writer_aux.Admonitions]
  137. def translate(self):
  138. self.visitor = visitor = self.translator_class(self.document)
  139. self.document.walkabout(visitor)
  140. for attr in self.visitor_attributes:
  141. setattr(self, attr, getattr(visitor, attr))
  142. self.output = self.apply_template()
  143. def apply_template(self):
  144. with open(self.document.settings.template, 'r',
  145. encoding='utf-8') as template_file:
  146. template = template_file.read()
  147. subs = self.interpolation_dict()
  148. return template % subs
  149. def interpolation_dict(self):
  150. subs = {}
  151. settings = self.document.settings
  152. for attr in self.visitor_attributes:
  153. subs[attr] = ''.join(getattr(self, attr)).rstrip('\n')
  154. subs['encoding'] = settings.output_encoding
  155. subs['version'] = docutils.__version__
  156. return subs
  157. def assemble_parts(self):
  158. writers.Writer.assemble_parts(self)
  159. for part in self.visitor_attributes:
  160. self.parts[part] = ''.join(getattr(self, part))
  161. class HTMLTranslator(nodes.NodeVisitor):
  162. """
  163. Generic Docutils to HTML translator.
  164. See the `html4css1` and `html5_polyglot` writers for full featured
  165. HTML writers.
  166. .. IMPORTANT::
  167. The `visit_*` and `depart_*` methods use a
  168. heterogeneous stack, `self.context`.
  169. When subclassing, make sure to be consistent in its use!
  170. Examples for robust coding:
  171. a) Override both `visit_*` and `depart_*` methods, don't call the
  172. parent functions.
  173. b) Extend both and unconditionally call the parent functions::
  174. def visit_example(self, node):
  175. if foo:
  176. self.body.append('<div class="foo">')
  177. html4css1.HTMLTranslator.visit_example(self, node)
  178. def depart_example(self, node):
  179. html4css1.HTMLTranslator.depart_example(self, node)
  180. if foo:
  181. self.body.append('</div>')
  182. c) Extend both, calling the parent functions under the same
  183. conditions::
  184. def visit_example(self, node):
  185. if foo:
  186. self.body.append('<div class="foo">\n')
  187. else: # call the parent method
  188. _html_base.HTMLTranslator.visit_example(self, node)
  189. def depart_example(self, node):
  190. if foo:
  191. self.body.append('</div>\n')
  192. else: # call the parent method
  193. _html_base.HTMLTranslator.depart_example(self, node)
  194. d) Extend one method (call the parent), but don't otherwise use the
  195. `self.context` stack::
  196. def depart_example(self, node):
  197. _html_base.HTMLTranslator.depart_example(self, node)
  198. if foo:
  199. # implementation-specific code
  200. # that does not use `self.context`
  201. self.body.append('</div>\n')
  202. This way, changes in stack use will not bite you.
  203. """
  204. xml_declaration = '<?xml version="1.0" encoding="%s" ?>\n'
  205. doctype = '<!DOCTYPE html>\n'
  206. doctype_mathml = doctype
  207. head_prefix_template = ('<html xmlns="http://www.w3.org/1999/xhtml"'
  208. ' xml:lang="%(lang)s" lang="%(lang)s">\n<head>\n')
  209. content_type = '<meta charset="%s" />\n'
  210. generator = ('<meta name="generator" content="Docutils %s: '
  211. 'https://docutils.sourceforge.io/" />\n')
  212. # Template for the MathJax script in the header:
  213. mathjax_script = '<script type="text/javascript" src="%s"></script>\n'
  214. mathjax_url = 'file:/usr/share/javascript/mathjax/MathJax.js'
  215. """
  216. URL of the MathJax javascript library.
  217. The MathJax library ought to be installed on the same
  218. server as the rest of the deployed site files and specified
  219. in the `math-output` setting appended to "mathjax".
  220. See `Docutils Configuration`__.
  221. __ https://docutils.sourceforge.io/docs/user/config.html#math-output
  222. The fallback tries a local MathJax installation at
  223. ``/usr/share/javascript/mathjax/MathJax.js``.
  224. """
  225. stylesheet_link = '<link rel="stylesheet" href="%s" type="text/css" />\n'
  226. embedded_stylesheet = '<style type="text/css">\n\n%s\n</style>\n'
  227. words_and_spaces = re.compile(r'[^ \n]+| +|\n')
  228. # wrap point inside word:
  229. in_word_wrap_point = re.compile(r'.+\W\W.+|[-?].+')
  230. lang_attribute = 'lang' # name changes to 'xml:lang' in XHTML 1.1
  231. special_characters = {ord('&'): '&amp;',
  232. ord('<'): '&lt;',
  233. ord('"'): '&quot;',
  234. ord('>'): '&gt;',
  235. ord('@'): '&#64;', # may thwart address harvesters
  236. }
  237. """Character references for characters with a special meaning in HTML."""
  238. def __init__(self, document):
  239. nodes.NodeVisitor.__init__(self, document)
  240. self.settings = settings = document.settings
  241. lcode = settings.language_code
  242. self.language = languages.get_language(lcode, document.reporter)
  243. self.meta = [self.generator % docutils.__version__]
  244. self.head_prefix = []
  245. self.html_prolog = []
  246. if settings.xml_declaration:
  247. self.head_prefix.append(self.xml_declaration
  248. % settings.output_encoding)
  249. # self.content_type = ""
  250. # encoding not interpolated:
  251. self.html_prolog.append(self.xml_declaration)
  252. self.head = self.meta[:]
  253. self.stylesheet = [self.stylesheet_call(path)
  254. for path in utils.get_stylesheet_list(settings)]
  255. self.body_prefix = ['</head>\n<body>\n']
  256. # document title, subtitle display
  257. self.body_pre_docinfo = []
  258. # author, date, etc.
  259. self.docinfo = []
  260. self.body = []
  261. self.fragment = []
  262. self.body_suffix = ['</body>\n</html>\n']
  263. self.section_level = 0
  264. self.initial_header_level = int(settings.initial_header_level)
  265. # image_loading only defined for HTML5 writer
  266. self.image_loading = getattr(settings, 'image_loading', None)
  267. # legacy setting embed_images:
  268. if getattr(settings, 'embed_images', None) is True:
  269. warnings.warn('The configuration setting "embed_images" '
  270. 'will be removed in Docutils 2.0. '
  271. 'Use "image_loading: embed".',
  272. FutureWarning, stacklevel=8)
  273. if self.image_loading is None:
  274. self.image_loading = 'embed'
  275. if getattr(settings, 'embed_images', None) is False:
  276. warnings.warn('The configuration setting "embed_images" '
  277. 'will be removed in Docutils 2.0. '
  278. 'Use "image_loading: link".',
  279. FutureWarning, stacklevel=8)
  280. if self.image_loading is None:
  281. self.image_loading = 'link' # default
  282. self.math_output = settings.math_output.split()
  283. self.math_output_options = self.math_output[1:]
  284. self.math_output = self.math_output[0].lower()
  285. self.context = []
  286. """Heterogeneous stack.
  287. Used by visit_* and depart_* functions in conjunction with the tree
  288. traversal. Make sure that the pops correspond to the pushes."""
  289. self.colspecs = []
  290. self.compact_p = True
  291. self.compact_simple = False
  292. self.compact_field_list = False
  293. self.in_docinfo = False
  294. self.in_sidebar = False
  295. self.title = []
  296. self.subtitle = []
  297. self.header = []
  298. self.footer = []
  299. self.html_head = [self.content_type] # charset not interpolated
  300. self.html_title = []
  301. self.html_subtitle = []
  302. self.html_body = []
  303. self.in_document_title = 0 # len(self.body) or 0
  304. self.in_mailto = False
  305. self.author_in_authors = False # for html4css1
  306. self.math_header = []
  307. def astext(self):
  308. return ''.join(self.head_prefix + self.head
  309. + self.stylesheet + self.body_prefix
  310. + self.body_pre_docinfo + self.docinfo
  311. + self.body + self.body_suffix)
  312. def encode(self, text):
  313. """Encode special characters in `text` & return."""
  314. # Use only named entities known in both XML and HTML
  315. # other characters are automatically encoded "by number" if required.
  316. # @@@ A codec to do these and all other HTML entities would be nice.
  317. text = str(text)
  318. return text.translate(self.special_characters)
  319. def cloak_mailto(self, uri):
  320. """Try to hide a mailto: URL from harvesters."""
  321. # Encode "@" using a URL octet reference (see RFC 1738).
  322. # Further cloaking with HTML entities will be done in the
  323. # `attval` function.
  324. return uri.replace('@', '%40')
  325. def cloak_email(self, addr):
  326. """Try to hide the link text of a email link from harversters."""
  327. # Surround at-signs and periods with <span> tags. ("@" has
  328. # already been encoded to "&#64;" by the `encode` method.)
  329. addr = addr.replace('&#64;', '<span>&#64;</span>')
  330. return addr.replace('.', '<span>&#46;</span>')
  331. def attval(self, text,
  332. whitespace=re.compile('[\n\r\t\v\f]')):
  333. """Cleanse, HTML encode, and return attribute value text."""
  334. encoded = self.encode(whitespace.sub(' ', text))
  335. if self.in_mailto and self.settings.cloak_email_addresses:
  336. # Cloak at-signs ("%40") and periods with HTML entities.
  337. encoded = encoded.replace('%40', '&#37;&#52;&#48;')
  338. encoded = encoded.replace('.', '&#46;')
  339. return encoded
  340. def stylesheet_call(self, path, adjust_path=None):
  341. """Return code to reference or embed stylesheet file `path`"""
  342. if adjust_path is None:
  343. adjust_path = bool(self.settings.stylesheet_path)
  344. if self.settings.embed_stylesheet:
  345. try:
  346. content = docutils.io.FileInput(source_path=path,
  347. encoding='utf-8').read()
  348. except OSError as err:
  349. msg = f'Cannot embed stylesheet: {err}'
  350. self.document.reporter.error(msg)
  351. return '<--- %s --->\n' % msg
  352. else:
  353. self.settings.record_dependencies.add(path)
  354. return self.embedded_stylesheet % content
  355. # else link to style file:
  356. if adjust_path:
  357. # rewrite path relative to output (cf. config.html#stylesheet-path)
  358. path = utils.relative_path(self.settings._destination, path)
  359. return self.stylesheet_link % self.encode(path)
  360. def starttag(self, node, tagname, suffix='\n', empty=False, **attributes):
  361. """
  362. Construct and return a start tag given a node (id & class attributes
  363. are extracted), tag name, and optional attributes.
  364. """
  365. tagname = tagname.lower()
  366. prefix = []
  367. atts = {}
  368. for (name, value) in attributes.items():
  369. atts[name.lower()] = value
  370. classes = atts.pop('classes', [])
  371. languages = []
  372. # unify class arguments and move language specification
  373. for cls in node.get('classes', []) + atts.pop('class', '').split():
  374. if cls.startswith('language-'):
  375. languages.append(cls[9:])
  376. elif cls.strip() and cls not in classes:
  377. classes.append(cls)
  378. if languages:
  379. # attribute name is 'lang' in XHTML 1.0 but 'xml:lang' in 1.1
  380. atts[self.lang_attribute] = languages[0]
  381. # filter classes that are processed by the writer:
  382. internal = ('colwidths-auto', 'colwidths-given', 'colwidths-grid')
  383. if isinstance(node, nodes.table):
  384. classes = [cls for cls in classes if cls not in internal]
  385. if classes:
  386. atts['class'] = ' '.join(classes)
  387. assert 'id' not in atts
  388. ids = node.get('ids', [])
  389. ids.extend(atts.pop('ids', []))
  390. if ids:
  391. atts['id'] = ids[0]
  392. for id in ids[1:]:
  393. # Add empty "span" elements for additional IDs. Note
  394. # that we cannot use empty "a" elements because there
  395. # may be targets inside of references, but nested "a"
  396. # elements aren't allowed in XHTML (even if they do
  397. # not all have a "href" attribute).
  398. if empty or isinstance(node, (nodes.Sequential,
  399. nodes.docinfo,
  400. nodes.table)):
  401. # Insert target right in front of element.
  402. prefix.append('<span id="%s"></span>' % id)
  403. else:
  404. # Non-empty tag. Place the auxiliary <span> tag
  405. # *inside* the element, as the first child.
  406. suffix += '<span id="%s"></span>' % id
  407. attlist = sorted(atts.items())
  408. parts = [tagname]
  409. for name, value in attlist:
  410. # value=None was used for boolean attributes without
  411. # value, but this isn't supported by XHTML.
  412. assert value is not None
  413. if isinstance(value, list):
  414. values = [str(v) for v in value]
  415. parts.append('%s="%s"' % (name.lower(),
  416. self.attval(' '.join(values))))
  417. else:
  418. parts.append('%s="%s"' % (name.lower(),
  419. self.attval(str(value))))
  420. if empty:
  421. infix = ' /'
  422. else:
  423. infix = ''
  424. return ''.join(prefix) + '<%s%s>' % (' '.join(parts), infix) + suffix
  425. def emptytag(self, node, tagname, suffix='\n', **attributes):
  426. """Construct and return an XML-compatible empty tag."""
  427. return self.starttag(node, tagname, suffix, empty=True, **attributes)
  428. def set_class_on_child(self, node, class_, index=0):
  429. """
  430. Set class `class_` on the visible child no. index of `node`.
  431. Do nothing if node has fewer children than `index`.
  432. """
  433. children = [n for n in node if not isinstance(n, nodes.Invisible)]
  434. try:
  435. child = children[index]
  436. except IndexError:
  437. return
  438. child['classes'].append(class_)
  439. def visit_Text(self, node):
  440. text = node.astext()
  441. encoded = self.encode(text)
  442. if self.in_mailto and self.settings.cloak_email_addresses:
  443. encoded = self.cloak_email(encoded)
  444. self.body.append(encoded)
  445. def depart_Text(self, node):
  446. pass
  447. def visit_abbreviation(self, node):
  448. # @@@ implementation incomplete ("title" attribute)
  449. self.body.append(self.starttag(node, 'abbr', ''))
  450. def depart_abbreviation(self, node):
  451. self.body.append('</abbr>')
  452. def visit_acronym(self, node):
  453. # @@@ implementation incomplete ("title" attribute)
  454. self.body.append(self.starttag(node, 'acronym', ''))
  455. def depart_acronym(self, node):
  456. self.body.append('</acronym>')
  457. def visit_address(self, node):
  458. self.visit_docinfo_item(node, 'address', meta=False)
  459. self.body.append(self.starttag(node, 'pre',
  460. suffix='', CLASS='address'))
  461. def depart_address(self, node):
  462. self.body.append('\n</pre>\n')
  463. self.depart_docinfo_item()
  464. def visit_admonition(self, node):
  465. self.body.append(self.starttag(node, 'aside', classes=['admonition']))
  466. def depart_admonition(self, node=None):
  467. self.body.append('</aside>\n')
  468. attribution_formats = {'dash': ('\u2014', ''),
  469. 'parentheses': ('(', ')'),
  470. 'parens': ('(', ')'),
  471. 'none': ('', '')}
  472. def visit_attribution(self, node):
  473. prefix, suffix = self.attribution_formats[self.settings.attribution]
  474. self.context.append(suffix)
  475. self.body.append(
  476. self.starttag(node, 'p', prefix, CLASS='attribution'))
  477. def depart_attribution(self, node):
  478. self.body.append(self.context.pop() + '</p>\n')
  479. def visit_author(self, node):
  480. if not(isinstance(node.parent, nodes.authors)):
  481. self.visit_docinfo_item(node, 'author')
  482. self.body.append('<p>')
  483. def depart_author(self, node):
  484. self.body.append('</p>')
  485. if isinstance(node.parent, nodes.authors):
  486. self.body.append('\n')
  487. else:
  488. self.depart_docinfo_item()
  489. def visit_authors(self, node):
  490. self.visit_docinfo_item(node, 'authors')
  491. def depart_authors(self, node):
  492. self.depart_docinfo_item()
  493. def visit_block_quote(self, node):
  494. self.body.append(self.starttag(node, 'blockquote'))
  495. def depart_block_quote(self, node):
  496. self.body.append('</blockquote>\n')
  497. def check_simple_list(self, node):
  498. """Check for a simple list that can be rendered compactly."""
  499. visitor = SimpleListChecker(self.document)
  500. try:
  501. node.walk(visitor)
  502. except nodes.NodeFound:
  503. return False
  504. else:
  505. return True
  506. # Compact lists
  507. # ------------
  508. # Include definition lists and field lists (in addition to ordered
  509. # and unordered lists) in the test if a list is "simple" (cf. the
  510. # html4css1.HTMLTranslator docstring and the SimpleListChecker class at
  511. # the end of this file).
  512. def is_compactable(self, node):
  513. # explicit class arguments have precedence
  514. if 'compact' in node['classes']:
  515. return True
  516. if 'open' in node['classes']:
  517. return False
  518. # check config setting:
  519. if (isinstance(node, (nodes.field_list, nodes.definition_list))
  520. and not self.settings.compact_field_lists):
  521. return False
  522. if (isinstance(node, (nodes.enumerated_list, nodes.bullet_list))
  523. and not self.settings.compact_lists):
  524. return False
  525. # Table of Contents:
  526. if 'contents' in node.parent['classes']:
  527. return True
  528. # check the list items:
  529. return self.check_simple_list(node)
  530. def visit_bullet_list(self, node):
  531. atts = {}
  532. old_compact_simple = self.compact_simple
  533. self.context.append((self.compact_simple, self.compact_p))
  534. self.compact_p = None
  535. self.compact_simple = self.is_compactable(node)
  536. if self.compact_simple and not old_compact_simple:
  537. atts['class'] = 'simple'
  538. self.body.append(self.starttag(node, 'ul', **atts))
  539. def depart_bullet_list(self, node):
  540. self.compact_simple, self.compact_p = self.context.pop()
  541. self.body.append('</ul>\n')
  542. def visit_caption(self, node):
  543. self.body.append(self.starttag(node, 'p', '', CLASS='caption'))
  544. def depart_caption(self, node):
  545. self.body.append('</p>\n')
  546. # Use semantic tag and DPub role (HTML4 uses a table)
  547. def visit_citation(self, node):
  548. # role 'doc-bibloentry' requires wrapping in an element with
  549. # role 'list' and an element with role 'doc-bibliography'
  550. # https://www.w3.org/TR/dpub-aria-1.0/#doc-biblioentry)
  551. if not isinstance(node.previous_sibling(), type(node)):
  552. self.body.append('<div role="list" class="citation-list">\n')
  553. self.body.append(self.starttag(node, 'div', classes=[node.tagname],
  554. role="doc-biblioentry"))
  555. def depart_citation(self, node):
  556. self.body.append('</div>\n')
  557. if not isinstance(node.next_node(descend=False, siblings=True),
  558. type(node)):
  559. self.body.append('</div>\n')
  560. # Use DPub role (overwritten in HTML4)
  561. def visit_citation_reference(self, node):
  562. href = '#'
  563. if 'refid' in node:
  564. href += node['refid']
  565. elif 'refname' in node:
  566. href += self.document.nameids[node['refname']]
  567. # else: # TODO system message (or already in the transform)?
  568. # 'Citation reference missing.'
  569. self.body.append(self.starttag(node, 'a', suffix='[', href=href,
  570. classes=['citation-reference'],
  571. role='doc-biblioref'))
  572. def depart_citation_reference(self, node):
  573. self.body.append(']</a>')
  574. # classifier
  575. # ----------
  576. # don't insert classifier-delimiter here (done by CSS)
  577. def visit_classifier(self, node):
  578. self.body.append(self.starttag(node, 'span', '', CLASS='classifier'))
  579. def depart_classifier(self, node):
  580. self.body.append('</span>')
  581. def visit_colspec(self, node):
  582. self.colspecs.append(node)
  583. # "stubs" list is an attribute of the tgroup element:
  584. node.parent.stubs.append(node.attributes.get('stub'))
  585. def depart_colspec(self, node):
  586. # write out <colgroup> when all colspecs are processed
  587. if isinstance(node.next_node(descend=False, siblings=True),
  588. nodes.colspec):
  589. return
  590. if 'colwidths-auto' in node.parent.parent['classes'] or (
  591. 'colwidths-grid' not in self.settings.table_style
  592. and 'colwidths-given' not in node.parent.parent['classes']):
  593. return
  594. self.body.append(self.starttag(node, 'colgroup'))
  595. total_width = sum(node['colwidth'] for node in self.colspecs)
  596. for node in self.colspecs:
  597. colwidth = node['colwidth'] / total_width
  598. self.body.append(self.emptytag(node, 'col',
  599. style=f'width: {colwidth:.1%}'))
  600. self.body.append('</colgroup>\n')
  601. def visit_comment(self, node,
  602. sub=re.compile('-(?=-)').sub):
  603. """Escape double-dashes in comment text."""
  604. self.body.append('<!-- %s -->\n' % sub('- ', node.astext()))
  605. # Content already processed:
  606. raise nodes.SkipNode
  607. def visit_compound(self, node):
  608. self.body.append(self.starttag(node, 'div', CLASS='compound'))
  609. def depart_compound(self, node):
  610. self.body.append('</div>\n')
  611. def visit_container(self, node):
  612. self.body.append(self.starttag(node, 'div',
  613. CLASS='docutils container'))
  614. def depart_container(self, node):
  615. self.body.append('</div>\n')
  616. def visit_contact(self, node):
  617. self.visit_docinfo_item(node, 'contact', meta=False)
  618. def depart_contact(self, node):
  619. self.depart_docinfo_item()
  620. def visit_copyright(self, node):
  621. self.visit_docinfo_item(node, 'copyright')
  622. def depart_copyright(self, node):
  623. self.depart_docinfo_item()
  624. def visit_date(self, node):
  625. self.visit_docinfo_item(node, 'date')
  626. def depart_date(self, node):
  627. self.depart_docinfo_item()
  628. def visit_decoration(self, node):
  629. pass
  630. def depart_decoration(self, node):
  631. pass
  632. def visit_definition(self, node):
  633. if "details" in node.parent.parent['classes']:
  634. self.body.append('</summary>\n')
  635. else:
  636. self.body.append('</dt>\n')
  637. self.body.append(self.starttag(node, 'dd', ''))
  638. def depart_definition(self, node):
  639. if "details" not in node.parent.parent['classes']:
  640. self.body.append('</dd>\n')
  641. def visit_definition_list(self, node):
  642. if "details" not in node['classes']:
  643. classes = ['simple'] if self.is_compactable(node) else []
  644. self.body.append(self.starttag(node, 'dl', classes=classes))
  645. def depart_definition_list(self, node):
  646. if "details" not in node['classes']:
  647. self.body.append('</dl>\n')
  648. # Use a "details" disclosure element if parent has "class" arg "details".
  649. def visit_definition_list_item(self, node):
  650. if "details" in node.parent['classes']:
  651. atts = {}
  652. if "open" in node.parent['classes']:
  653. atts['open'] = 'open'
  654. self.body.append(self.starttag(node, 'details', **atts))
  655. def depart_definition_list_item(self, node):
  656. if "details" in node.parent['classes']:
  657. self.body.append('</details>\n')
  658. def visit_description(self, node):
  659. self.body.append(self.starttag(node, 'dd', ''))
  660. def depart_description(self, node):
  661. self.body.append('</dd>\n')
  662. def visit_docinfo(self, node):
  663. self.context.append(len(self.body))
  664. classes = ['docinfo']
  665. if self.is_compactable(node):
  666. classes.append('simple')
  667. self.body.append(self.starttag(node, 'dl', classes=classes))
  668. def depart_docinfo(self, node):
  669. self.body.append('</dl>\n')
  670. start = self.context.pop()
  671. self.docinfo = self.body[start:]
  672. self.body = []
  673. def visit_docinfo_item(self, node, name, meta=True):
  674. if meta:
  675. meta_tag = '<meta name="%s" content="%s" />\n' \
  676. % (name, self.attval(node.astext()))
  677. self.add_meta(meta_tag)
  678. self.body.append(
  679. '<dt class="%s">%s<span class="colon">:</span></dt>\n'
  680. % (name, self.language.labels[name]))
  681. self.body.append(self.starttag(node, 'dd', '', CLASS=name))
  682. def depart_docinfo_item(self):
  683. self.body.append('</dd>\n')
  684. def visit_doctest_block(self, node):
  685. self.body.append(self.starttag(node, 'pre', suffix='',
  686. classes=['code', 'python', 'doctest']))
  687. def depart_doctest_block(self, node):
  688. self.body.append('\n</pre>\n')
  689. def visit_document(self, node):
  690. title = (node.get('title', '') or os.path.basename(node['source'])
  691. or 'docutils document without title')
  692. self.head.append('<title>%s</title>\n' % self.encode(title))
  693. def depart_document(self, node):
  694. self.head_prefix.extend([self.doctype,
  695. self.head_prefix_template %
  696. {'lang': self.settings.language_code}])
  697. self.html_prolog.append(self.doctype)
  698. self.meta.insert(0, self.content_type % self.settings.output_encoding)
  699. self.head.insert(0, self.content_type % self.settings.output_encoding)
  700. if 'name="dcterms.' in ''.join(self.meta):
  701. self.head.append('<link rel="schema.dcterms"'
  702. 'href="http://purl.org/dc/terms/"/>')
  703. if self.math_header:
  704. if self.math_output == 'mathjax':
  705. self.head.extend(self.math_header)
  706. else:
  707. self.stylesheet.extend(self.math_header)
  708. # skip content-type meta tag with interpolated charset value:
  709. self.html_head.extend(self.head[1:])
  710. self.body_prefix.append(self.starttag(node, 'div', CLASS='document'))
  711. self.body_suffix.insert(0, '</div>\n')
  712. self.fragment.extend(self.body) # self.fragment is the "naked" body
  713. self.html_body.extend(self.body_prefix[1:] + self.body_pre_docinfo
  714. + self.docinfo + self.body
  715. + self.body_suffix[:-1])
  716. assert not self.context, 'len(context) = %s' % len(self.context)
  717. def visit_emphasis(self, node):
  718. self.body.append(self.starttag(node, 'em', ''))
  719. def depart_emphasis(self, node):
  720. self.body.append('</em>')
  721. def visit_entry(self, node):
  722. atts = {'classes': []}
  723. if isinstance(node.parent.parent, nodes.thead):
  724. atts['classes'].append('head')
  725. if node.parent.parent.parent.stubs[node.parent.column]:
  726. # "stubs" list is an attribute of the tgroup element
  727. atts['classes'].append('stub')
  728. if atts['classes']:
  729. tagname = 'th'
  730. else:
  731. tagname = 'td'
  732. node.parent.column += 1
  733. if 'morerows' in node:
  734. atts['rowspan'] = node['morerows'] + 1
  735. if 'morecols' in node:
  736. atts['colspan'] = node['morecols'] + 1
  737. node.parent.column += node['morecols']
  738. self.body.append(self.starttag(node, tagname, '', **atts))
  739. self.context.append('</%s>\n' % tagname.lower())
  740. def depart_entry(self, node):
  741. self.body.append(self.context.pop())
  742. def visit_enumerated_list(self, node):
  743. atts = {'classes': []}
  744. if 'start' in node:
  745. atts['start'] = node['start']
  746. if 'enumtype' in node:
  747. atts['classes'].append(node['enumtype'])
  748. if self.is_compactable(node):
  749. atts['classes'].append('simple')
  750. self.body.append(self.starttag(node, 'ol', **atts))
  751. def depart_enumerated_list(self, node):
  752. self.body.append('</ol>\n')
  753. def visit_field_list(self, node):
  754. atts = {}
  755. classes = node.setdefault('classes', [])
  756. for i, cls in enumerate(classes):
  757. if cls.startswith('field-indent-'):
  758. try:
  759. indent_length = length_or_percentage_or_unitless(
  760. cls[13:], 'px')
  761. except ValueError:
  762. break
  763. atts['style'] = '--field-indent: %s;' % indent_length
  764. classes.pop(i)
  765. break
  766. classes.append('field-list')
  767. if self.is_compactable(node):
  768. classes.append('simple')
  769. self.body.append(self.starttag(node, 'dl', **atts))
  770. def depart_field_list(self, node):
  771. self.body.append('</dl>\n')
  772. def visit_field(self, node):
  773. pass
  774. def depart_field(self, node):
  775. pass
  776. # as field is ignored, pass class arguments to field-name and field-body:
  777. def visit_field_name(self, node):
  778. self.body.append(self.starttag(node, 'dt', '',
  779. classes=node.parent['classes']))
  780. def depart_field_name(self, node):
  781. self.body.append('<span class="colon">:</span></dt>\n')
  782. def visit_field_body(self, node):
  783. self.body.append(self.starttag(node, 'dd', '',
  784. classes=node.parent['classes']))
  785. # prevent misalignment of following content if the field is empty:
  786. if not node.children:
  787. self.body.append('<p></p>')
  788. def depart_field_body(self, node):
  789. self.body.append('</dd>\n')
  790. def visit_figure(self, node):
  791. atts = {'class': 'figure'}
  792. if node.get('width'):
  793. atts['style'] = 'width: %s' % node['width']
  794. if node.get('align'):
  795. atts['class'] += " align-" + node['align']
  796. self.body.append(self.starttag(node, 'div', **atts))
  797. def depart_figure(self, node):
  798. self.body.append('</div>\n')
  799. def visit_footer(self, node):
  800. self.context.append(len(self.body))
  801. def depart_footer(self, node):
  802. start = self.context.pop()
  803. footer = [self.starttag(node, 'div', CLASS='footer'),
  804. '<hr class="footer" />\n']
  805. footer.extend(self.body[start:])
  806. footer.append('\n</div>\n')
  807. self.footer.extend(footer)
  808. self.body_suffix[:0] = footer
  809. del self.body[start:]
  810. def visit_footnote(self, node):
  811. # No native HTML element: use <aside> with ARIA role
  812. # (html4css1 uses tables).
  813. # Wrap groups of footnotes for easier styling.
  814. label_style = self.settings.footnote_references # brackets/superscript
  815. if not isinstance(node.previous_sibling(), type(node)):
  816. self.body.append(f'<aside class="footnote-list {label_style}">\n')
  817. self.body.append(self.starttag(node, 'aside',
  818. classes=[node.tagname, label_style],
  819. role="note"))
  820. def depart_footnote(self, node):
  821. self.body.append('</aside>\n')
  822. if not isinstance(node.next_node(descend=False, siblings=True),
  823. type(node)):
  824. self.body.append('</aside>\n')
  825. def visit_footnote_reference(self, node):
  826. href = '#' + node['refid']
  827. classes = ['footnote-reference', self.settings.footnote_references]
  828. self.body.append(self.starttag(node, 'a', suffix='', classes=classes,
  829. role='doc-noteref', href=href))
  830. self.body.append('<span class="fn-bracket">[</span>')
  831. def depart_footnote_reference(self, node):
  832. self.body.append('<span class="fn-bracket">]</span>')
  833. self.body.append('</a>')
  834. # Docutils-generated text: put section numbers in a span for CSS styling:
  835. def visit_generated(self, node):
  836. if 'sectnum' in node['classes']:
  837. # get section number (strip trailing no-break-spaces)
  838. sectnum = node.astext().rstrip(' ')
  839. self.body.append('<span class="sectnum">%s </span>'
  840. % self.encode(sectnum))
  841. # Content already processed:
  842. raise nodes.SkipNode
  843. def depart_generated(self, node):
  844. pass
  845. def visit_header(self, node):
  846. self.context.append(len(self.body))
  847. def depart_header(self, node):
  848. start = self.context.pop()
  849. header = [self.starttag(node, 'div', CLASS='header')]
  850. header.extend(self.body[start:])
  851. header.append('\n<hr class="header"/>\n</div>\n')
  852. self.body_prefix.extend(header)
  853. self.header.extend(header)
  854. del self.body[start:]
  855. def visit_image(self, node):
  856. atts = {}
  857. uri = node['uri']
  858. mimetype = mimetypes.guess_type(uri)[0]
  859. # image size
  860. if 'width' in node:
  861. atts['width'] = node['width']
  862. if 'height' in node:
  863. atts['height'] = node['height']
  864. if 'scale' in node:
  865. if (PIL and ('width' not in node or 'height' not in node)
  866. and self.settings.file_insertion_enabled):
  867. imagepath = url2pathname(uri)
  868. try:
  869. with PIL.Image.open(imagepath) as img:
  870. imgsize = img.size
  871. except (OSError, UnicodeEncodeError):
  872. pass # TODO: warn?
  873. else:
  874. self.settings.record_dependencies.add(
  875. imagepath.replace('\\', '/'))
  876. if 'width' not in atts:
  877. atts['width'] = '%dpx' % imgsize[0]
  878. if 'height' not in atts:
  879. atts['height'] = '%dpx' % imgsize[1]
  880. del img
  881. for att_name in 'width', 'height':
  882. if att_name in atts:
  883. match = re.match(r'([0-9.]+)(\S*)$', atts[att_name])
  884. assert match
  885. atts[att_name] = '%s%s' % (
  886. float(match.group(1)) * (float(node['scale']) / 100),
  887. match.group(2))
  888. style = []
  889. for att_name in 'width', 'height':
  890. if att_name in atts:
  891. if re.match(r'^[0-9.]+$', atts[att_name]):
  892. # Interpret unitless values as pixels.
  893. atts[att_name] += 'px'
  894. style.append('%s: %s;' % (att_name, atts[att_name]))
  895. del atts[att_name]
  896. if style:
  897. atts['style'] = ' '.join(style)
  898. if (isinstance(node.parent, nodes.TextElement)
  899. or (isinstance(node.parent, nodes.reference)
  900. and not isinstance(node.parent.parent, nodes.TextElement))):
  901. # Inline context or surrounded by <a>...</a>.
  902. suffix = ''
  903. else:
  904. suffix = '\n'
  905. if 'align' in node:
  906. atts['class'] = 'align-%s' % node['align']
  907. # Embed image file (embedded SVG or data URI):
  908. if self.image_loading == 'embed':
  909. try:
  910. with open(url2pathname(uri), 'rb') as imagefile:
  911. imagedata = imagefile.read()
  912. except OSError as err:
  913. self.document.reporter.error('Cannot embed image %r: %s'
  914. % (uri, err.strerror))
  915. else:
  916. self.settings.record_dependencies.add(
  917. uri.replace('\\', '/'))
  918. # TODO: insert SVG as-is?
  919. # if mimetype == 'image/svg+xml':
  920. # read/parse, apply arguments,
  921. # insert as <svg ....> ... </svg> # (about 1/3 less data)
  922. data64 = base64.b64encode(imagedata).decode()
  923. uri = 'data:%s;base64,%s' % (mimetype, data64)
  924. elif self.image_loading == 'lazy':
  925. atts['loading'] = 'lazy'
  926. if mimetype == 'application/x-shockwave-flash':
  927. atts['type'] = mimetype
  928. # do NOT use an empty tag: incorrect rendering in browsers
  929. tag = (self.starttag(node, 'object', '', data=uri, **atts)
  930. + node.get('alt', uri) + '</object>' + suffix)
  931. else:
  932. atts['alt'] = node.get('alt', node['uri'])
  933. tag = self.emptytag(node, 'img', suffix, src=uri, **atts)
  934. self.body.append(tag)
  935. def depart_image(self, node):
  936. pass
  937. def visit_inline(self, node):
  938. self.body.append(self.starttag(node, 'span', ''))
  939. def depart_inline(self, node):
  940. self.body.append('</span>')
  941. # footnote and citation labels:
  942. def visit_label(self, node):
  943. self.body.append('<span class="label">')
  944. self.body.append('<span class="fn-bracket">[</span>')
  945. # footnote/citation backrefs:
  946. if self.settings.footnote_backlinks:
  947. backrefs = node.parent.get('backrefs', [])
  948. if len(backrefs) == 1:
  949. self.body.append('<a role="doc-backlink"'
  950. ' href="#%s">' % backrefs[0])
  951. def depart_label(self, node):
  952. backrefs = []
  953. if self.settings.footnote_backlinks:
  954. backrefs = node.parent.get('backrefs', backrefs)
  955. if len(backrefs) == 1:
  956. self.body.append('</a>')
  957. self.body.append('<span class="fn-bracket">]</span></span>\n')
  958. if len(backrefs) > 1:
  959. backlinks = ['<a role="doc-backlink" href="#%s">%s</a>' % (ref, i)
  960. for (i, ref) in enumerate(backrefs, 1)]
  961. self.body.append('<span class="backrefs">(%s)</span>\n'
  962. % ','.join(backlinks))
  963. def visit_legend(self, node):
  964. self.body.append(self.starttag(node, 'div', CLASS='legend'))
  965. def depart_legend(self, node):
  966. self.body.append('</div>\n')
  967. def visit_line(self, node):
  968. self.body.append(self.starttag(node, 'div', suffix='', CLASS='line'))
  969. if not len(node):
  970. self.body.append('<br />')
  971. def depart_line(self, node):
  972. self.body.append('</div>\n')
  973. def visit_line_block(self, node):
  974. self.body.append(self.starttag(node, 'div', CLASS='line-block'))
  975. def depart_line_block(self, node):
  976. self.body.append('</div>\n')
  977. def visit_list_item(self, node):
  978. self.body.append(self.starttag(node, 'li', ''))
  979. def depart_list_item(self, node):
  980. self.body.append('</li>\n')
  981. # inline literal
  982. def visit_literal(self, node):
  983. # special case: "code" role
  984. classes = node['classes']
  985. if 'code' in classes:
  986. # filter 'code' from class arguments
  987. classes.pop(classes.index('code'))
  988. self.body.append(self.starttag(node, 'code', ''))
  989. return
  990. self.body.append(
  991. self.starttag(node, 'span', '', CLASS='docutils literal'))
  992. text = node.astext()
  993. if not isinstance(node.parent, nodes.literal_block):
  994. text = text.replace('\n', ' ')
  995. # Protect text like ``--an-option`` and the regular expression
  996. # ``[+]?(\d+(\.\d*)?|\.\d+)`` from bad line wrapping
  997. for token in self.words_and_spaces.findall(text):
  998. if token.strip() and self.in_word_wrap_point.search(token):
  999. self.body.append('<span class="pre">%s</span>'
  1000. % self.encode(token))
  1001. else:
  1002. self.body.append(self.encode(token))
  1003. self.body.append('</span>')
  1004. raise nodes.SkipNode # content already processed
  1005. def depart_literal(self, node):
  1006. # skipped unless literal element is from "code" role:
  1007. self.body.append('</code>')
  1008. def visit_literal_block(self, node):
  1009. self.body.append(self.starttag(node, 'pre', '', CLASS='literal-block'))
  1010. if 'code' in node['classes']:
  1011. self.body.append('<code>')
  1012. def depart_literal_block(self, node):
  1013. if 'code' in node['classes']:
  1014. self.body.append('</code>')
  1015. self.body.append('</pre>\n')
  1016. # Mathematics:
  1017. # As there is no native HTML math support, we provide alternatives
  1018. # for the math-output: LaTeX and MathJax simply wrap the content,
  1019. # HTML and MathML also convert the math_code.
  1020. # HTML container
  1021. math_tags = {
  1022. # math_output: (block, inline, class-arguments)
  1023. 'html': ('div', 'span', 'formula'),
  1024. 'latex': ('pre', 'tt', 'math'),
  1025. 'mathml': ('div', '', ''),
  1026. 'mathjax': ('div', 'span', 'math'),
  1027. }
  1028. def visit_math(self, node, math_env=''):
  1029. # If the method is called from visit_math_block(), math_env != ''.
  1030. if self.math_output not in self.math_tags:
  1031. self.document.reporter.error(
  1032. f'math-output format "{self.math_output}" not supported '
  1033. 'falling back to "latex"')
  1034. self.math_output = 'latex'
  1035. tag = self.math_tags[self.math_output][math_env == '']
  1036. clsarg = self.math_tags[self.math_output][2]
  1037. # LaTeX container
  1038. wrappers = {
  1039. # math_mode: (inline, block)
  1040. 'html': ('$%s$', '\\begin{%s}\n%s\n\\end{%s}'),
  1041. 'latex': (None, None),
  1042. 'mathml': ('$%s$', '\\begin{%s}\n%s\n\\end{%s}'),
  1043. 'mathjax': (r'\(%s\)', '\\begin{%s}\n%s\n\\end{%s}'),
  1044. }
  1045. wrapper = wrappers[self.math_output][math_env != '']
  1046. if (self.math_output == 'mathml'
  1047. and (not self.math_output_options
  1048. or self.math_output_options[0] == 'blahtexml')):
  1049. wrapper = None
  1050. # get and wrap content
  1051. math_code = node.astext().translate(unichar2tex.uni2tex_table)
  1052. if wrapper:
  1053. try: # wrapper with three "%s"
  1054. math_code = wrapper % (math_env, math_code, math_env)
  1055. except TypeError: # wrapper with one "%s"
  1056. math_code = wrapper % math_code
  1057. # settings and conversion
  1058. if self.math_output in ('latex', 'mathjax'):
  1059. math_code = self.encode(math_code)
  1060. if self.math_output == 'mathjax' and not self.math_header:
  1061. try:
  1062. self.mathjax_url = self.math_output_options[0]
  1063. except IndexError:
  1064. self.document.reporter.warning('No MathJax URL specified, '
  1065. 'using local fallback '
  1066. '(see config.html)')
  1067. # append configuration, if not already present in the URL:
  1068. # input LaTeX with AMS, output common HTML
  1069. if '?' not in self.mathjax_url:
  1070. self.mathjax_url += '?config=TeX-AMS_CHTML'
  1071. self.math_header = [self.mathjax_script % self.mathjax_url]
  1072. elif self.math_output == 'html':
  1073. if self.math_output_options and not self.math_header:
  1074. self.math_header = [self.stylesheet_call(
  1075. utils.find_file_in_dirs(s, self.settings.stylesheet_dirs),
  1076. adjust_path=True)
  1077. for s in self.math_output_options[0].split(',')]
  1078. # TODO: fix display mode in matrices and fractions
  1079. math2html.DocumentParameters.displaymode = (math_env != '')
  1080. math_code = math2html.math2html(math_code)
  1081. elif self.math_output == 'mathml':
  1082. if 'XHTML 1' in self.doctype:
  1083. self.doctype = self.doctype_mathml
  1084. self.content_type = self.content_type_mathml
  1085. converter = ' '.join(self.math_output_options).lower()
  1086. try:
  1087. if converter == 'latexml':
  1088. math_code = tex2mathml_extern.latexml(
  1089. math_code, self.document.reporter)
  1090. elif converter == 'ttm':
  1091. math_code = tex2mathml_extern.ttm(
  1092. math_code, self.document.reporter)
  1093. elif converter == 'blahtexml':
  1094. math_code = tex2mathml_extern.blahtexml(
  1095. math_code,
  1096. inline=not(math_env),
  1097. reporter=self.document.reporter)
  1098. elif not converter:
  1099. math_code = latex2mathml.tex2mathml(
  1100. math_code, inline=not(math_env))
  1101. else:
  1102. self.document.reporter.error('option "%s" not supported '
  1103. 'with math-output "MathML"')
  1104. except OSError:
  1105. raise OSError('is "latexmlmath" in your PATH?')
  1106. except SyntaxError as err:
  1107. err_node = self.document.reporter.error(err, base_node=node)
  1108. self.visit_system_message(err_node)
  1109. self.body.append(self.starttag(node, 'p'))
  1110. self.body.append(','.join(err.args))
  1111. self.body.append('</p>\n')
  1112. self.body.append(self.starttag(node, 'pre',
  1113. CLASS='literal-block'))
  1114. self.body.append(self.encode(math_code))
  1115. self.body.append('\n</pre>\n')
  1116. self.depart_system_message(err_node)
  1117. raise nodes.SkipNode
  1118. # append to document body
  1119. if tag:
  1120. self.body.append(self.starttag(node, tag,
  1121. suffix='\n'*bool(math_env),
  1122. CLASS=clsarg))
  1123. self.body.append(math_code)
  1124. if math_env: # block mode (equation, display)
  1125. self.body.append('\n')
  1126. if tag:
  1127. self.body.append('</%s>' % tag)
  1128. if math_env:
  1129. self.body.append('\n')
  1130. # Content already processed:
  1131. raise nodes.SkipNode
  1132. def depart_math(self, node):
  1133. pass # never reached
  1134. def visit_math_block(self, node):
  1135. math_env = pick_math_environment(node.astext())
  1136. self.visit_math(node, math_env=math_env)
  1137. def depart_math_block(self, node):
  1138. pass # never reached
  1139. # Meta tags: 'lang' attribute replaced by 'xml:lang' in XHTML 1.1
  1140. # HTML5/polyglot recommends using both
  1141. def visit_meta(self, node):
  1142. meta = self.emptytag(node, 'meta', **node.non_default_attributes())
  1143. self.add_meta(meta)
  1144. def depart_meta(self, node):
  1145. pass
  1146. def add_meta(self, tag):
  1147. self.meta.append(tag)
  1148. self.head.append(tag)
  1149. def visit_option(self, node):
  1150. self.body.append(self.starttag(node, 'span', '', CLASS='option'))
  1151. def depart_option(self, node):
  1152. self.body.append('</span>')
  1153. if isinstance(node.next_node(descend=False, siblings=True),
  1154. nodes.option):
  1155. self.body.append(', ')
  1156. def visit_option_argument(self, node):
  1157. self.body.append(node.get('delimiter', ' '))
  1158. self.body.append(self.starttag(node, 'var', ''))
  1159. def depart_option_argument(self, node):
  1160. self.body.append('</var>')
  1161. def visit_option_group(self, node):
  1162. self.body.append(self.starttag(node, 'dt', ''))
  1163. self.body.append('<kbd>')
  1164. def depart_option_group(self, node):
  1165. self.body.append('</kbd></dt>\n')
  1166. def visit_option_list(self, node):
  1167. self.body.append(
  1168. self.starttag(node, 'dl', CLASS='option-list'))
  1169. def depart_option_list(self, node):
  1170. self.body.append('</dl>\n')
  1171. def visit_option_list_item(self, node):
  1172. pass
  1173. def depart_option_list_item(self, node):
  1174. pass
  1175. def visit_option_string(self, node):
  1176. pass
  1177. def depart_option_string(self, node):
  1178. pass
  1179. def visit_organization(self, node):
  1180. self.visit_docinfo_item(node, 'organization')
  1181. def depart_organization(self, node):
  1182. self.depart_docinfo_item()
  1183. # Do not omit <p> tags
  1184. # --------------------
  1185. #
  1186. # The HTML4CSS1 writer does this to "produce
  1187. # visually compact lists (less vertical whitespace)". This writer
  1188. # relies on CSS rules for visual compactness.
  1189. #
  1190. # * In XHTML 1.1, e.g., a <blockquote> element may not contain
  1191. # character data, so you cannot drop the <p> tags.
  1192. # * Keeping simple paragraphs in the field_body enables a CSS
  1193. # rule to start the field-body on a new line if the label is too long
  1194. # * it makes the code simpler.
  1195. #
  1196. # TODO: omit paragraph tags in simple table cells?
  1197. def visit_paragraph(self, node):
  1198. self.body.append(self.starttag(node, 'p', ''))
  1199. def depart_paragraph(self, node):
  1200. self.body.append('</p>')
  1201. if not (isinstance(node.parent, (nodes.list_item, nodes.entry))
  1202. and (len(node.parent) == 1)):
  1203. self.body.append('\n')
  1204. def visit_problematic(self, node):
  1205. if node.hasattr('refid'):
  1206. self.body.append('<a href="#%s">' % node['refid'])
  1207. self.context.append('</a>')
  1208. else:
  1209. self.context.append('')
  1210. self.body.append(self.starttag(node, 'span', '', CLASS='problematic'))
  1211. def depart_problematic(self, node):
  1212. self.body.append('</span>')
  1213. self.body.append(self.context.pop())
  1214. def visit_raw(self, node):
  1215. if 'html' in node.get('format', '').split():
  1216. if isinstance(node.parent, nodes.TextElement):
  1217. tagname = 'span'
  1218. else:
  1219. tagname = 'div'
  1220. if node['classes']:
  1221. self.body.append(self.starttag(node, tagname, suffix=''))
  1222. self.body.append(node.astext())
  1223. if node['classes']:
  1224. self.body.append('</%s>' % tagname)
  1225. # Keep non-HTML raw text out of output:
  1226. raise nodes.SkipNode
  1227. def visit_reference(self, node):
  1228. atts = {'class': 'reference'}
  1229. if 'refuri' in node:
  1230. atts['href'] = node['refuri']
  1231. if (self.settings.cloak_email_addresses
  1232. and atts['href'].startswith('mailto:')):
  1233. atts['href'] = self.cloak_mailto(atts['href'])
  1234. self.in_mailto = True
  1235. atts['class'] += ' external'
  1236. else:
  1237. assert 'refid' in node, \
  1238. 'References must have "refuri" or "refid" attribute.'
  1239. atts['href'] = '#' + node['refid']
  1240. atts['class'] += ' internal'
  1241. if len(node) == 1 and isinstance(node[0], nodes.image):
  1242. atts['class'] += ' image-reference'
  1243. if not isinstance(node.parent, nodes.TextElement):
  1244. assert len(node) == 1 and isinstance(node[0], nodes.image)
  1245. atts['class'] += ' image-reference'
  1246. self.body.append(self.starttag(node, 'a', '', **atts))
  1247. def depart_reference(self, node):
  1248. self.body.append('</a>')
  1249. if not isinstance(node.parent, nodes.TextElement):
  1250. self.body.append('\n')
  1251. self.in_mailto = False
  1252. def visit_revision(self, node):
  1253. self.visit_docinfo_item(node, 'revision', meta=False)
  1254. def depart_revision(self, node):
  1255. self.depart_docinfo_item()
  1256. def visit_row(self, node):
  1257. self.body.append(self.starttag(node, 'tr', ''))
  1258. node.column = 0
  1259. def depart_row(self, node):
  1260. self.body.append('</tr>\n')
  1261. def visit_rubric(self, node):
  1262. self.body.append(self.starttag(node, 'p', '', CLASS='rubric'))
  1263. def depart_rubric(self, node):
  1264. self.body.append('</p>\n')
  1265. def visit_section(self, node):
  1266. self.section_level += 1
  1267. self.body.append(
  1268. self.starttag(node, 'div', CLASS='section'))
  1269. def depart_section(self, node):
  1270. self.section_level -= 1
  1271. self.body.append('</div>\n')
  1272. # TODO: use the new HTML5 element <aside>
  1273. def visit_sidebar(self, node):
  1274. self.body.append(
  1275. self.starttag(node, 'div', CLASS='sidebar'))
  1276. self.in_sidebar = True
  1277. def depart_sidebar(self, node):
  1278. self.body.append('</div>\n')
  1279. self.in_sidebar = False
  1280. def visit_status(self, node):
  1281. self.visit_docinfo_item(node, 'status', meta=False)
  1282. def depart_status(self, node):
  1283. self.depart_docinfo_item()
  1284. def visit_strong(self, node):
  1285. self.body.append(self.starttag(node, 'strong', ''))
  1286. def depart_strong(self, node):
  1287. self.body.append('</strong>')
  1288. def visit_subscript(self, node):
  1289. self.body.append(self.starttag(node, 'sub', ''))
  1290. def depart_subscript(self, node):
  1291. self.body.append('</sub>')
  1292. def visit_substitution_definition(self, node):
  1293. """Internal only."""
  1294. raise nodes.SkipNode
  1295. def visit_substitution_reference(self, node):
  1296. self.unimplemented_visit(node)
  1297. # h1–h6 elements must not be used to markup subheadings, subtitles,
  1298. # alternative titles and taglines unless intended to be the heading for a
  1299. # new section or subsection.
  1300. # -- http://www.w3.org/TR/html51/sections.html#headings-and-sections
  1301. def visit_subtitle(self, node):
  1302. if isinstance(node.parent, nodes.sidebar):
  1303. classes = ['sidebar-subtitle']
  1304. elif isinstance(node.parent, nodes.document):
  1305. classes = ['subtitle']
  1306. self.in_document_title = len(self.body) + 1
  1307. elif isinstance(node.parent, nodes.section):
  1308. classes = ['section-subtitle']
  1309. self.body.append(self.starttag(node, 'p', '', classes=classes))
  1310. def depart_subtitle(self, node):
  1311. self.body.append('</p>\n')
  1312. if isinstance(node.parent, nodes.document):
  1313. self.subtitle = self.body[self.in_document_title:-1]
  1314. self.in_document_title = 0
  1315. self.body_pre_docinfo.extend(self.body)
  1316. self.html_subtitle.extend(self.body)
  1317. del self.body[:]
  1318. def visit_superscript(self, node):
  1319. self.body.append(self.starttag(node, 'sup', ''))
  1320. def depart_superscript(self, node):
  1321. self.body.append('</sup>')
  1322. def visit_system_message(self, node):
  1323. self.body.append(self.starttag(node, 'aside', CLASS='system-message'))
  1324. self.body.append('<p class="system-message-title">')
  1325. backref_text = ''
  1326. if len(node['backrefs']):
  1327. backrefs = node['backrefs']
  1328. if len(backrefs) == 1:
  1329. backref_text = ('; <em><a href="#%s">backlink</a></em>'
  1330. % backrefs[0])
  1331. else:
  1332. i = 1
  1333. backlinks = []
  1334. for backref in backrefs:
  1335. backlinks.append('<a href="#%s">%s</a>' % (backref, i))
  1336. i += 1
  1337. backref_text = ('; <em>backlinks: %s</em>'
  1338. % ', '.join(backlinks))
  1339. if node.hasattr('line'):
  1340. line = ', line %s' % node['line']
  1341. else:
  1342. line = ''
  1343. self.body.append('System Message: %s/%s '
  1344. '(<span class="docutils literal">%s</span>%s)%s</p>\n'
  1345. % (node['type'], node['level'],
  1346. self.encode(node['source']), line, backref_text))
  1347. def depart_system_message(self, node):
  1348. self.body.append('</aside>\n')
  1349. def visit_table(self, node):
  1350. atts = {'classes': self.settings.table_style.replace(',', ' ').split()}
  1351. if 'align' in node:
  1352. atts['classes'].append('align-%s' % node['align'])
  1353. if 'width' in node:
  1354. atts['style'] = 'width: %s;' % node['width']
  1355. tag = self.starttag(node, 'table', **atts)
  1356. self.body.append(tag)
  1357. def depart_table(self, node):
  1358. self.body.append('</table>\n')
  1359. def visit_target(self, node):
  1360. if ('refuri' not in node
  1361. and 'refid' not in node
  1362. and 'refname' not in node):
  1363. self.body.append(self.starttag(node, 'span', '', CLASS='target'))
  1364. self.context.append('</span>')
  1365. else:
  1366. self.context.append('')
  1367. def depart_target(self, node):
  1368. self.body.append(self.context.pop())
  1369. # no hard-coded vertical alignment in table body
  1370. def visit_tbody(self, node):
  1371. self.body.append(self.starttag(node, 'tbody'))
  1372. def depart_tbody(self, node):
  1373. self.body.append('</tbody>\n')
  1374. def visit_term(self, node):
  1375. if "details" in node.parent.parent['classes']:
  1376. self.body.append(self.starttag(node, 'summary', ''))
  1377. else:
  1378. # The parent node (definition_list_item) is omitted in HTML.
  1379. self.body.append(self.starttag(node, 'dt', '',
  1380. classes=node.parent['classes'],
  1381. ids=node.parent['ids']))
  1382. def depart_term(self, node):
  1383. # Leave the end tag to `self.visit_definition()`,
  1384. # in case there's a classifier.
  1385. pass
  1386. def visit_tgroup(self, node):
  1387. self.colspecs = []
  1388. node.stubs = []
  1389. def depart_tgroup(self, node):
  1390. pass
  1391. def visit_thead(self, node):
  1392. self.body.append(self.starttag(node, 'thead'))
  1393. def depart_thead(self, node):
  1394. self.body.append('</thead>\n')
  1395. def section_title_tags(self, node):
  1396. atts = {}
  1397. h_level = self.section_level + self.initial_header_level - 1
  1398. # Only 6 heading levels have dedicated HTML tags.
  1399. tagname = 'h%i' % min(h_level, 6)
  1400. if h_level > 6:
  1401. atts['aria-level'] = h_level
  1402. start_tag = self.starttag(node, tagname, '', **atts)
  1403. if node.hasattr('refid'):
  1404. atts = {}
  1405. atts['class'] = 'toc-backref'
  1406. atts['role'] = 'doc-backlink' # HTML5 only
  1407. atts['href'] = '#' + node['refid']
  1408. start_tag += self.starttag(nodes.reference(), 'a', '', **atts)
  1409. close_tag = '</a></%s>\n' % tagname
  1410. else:
  1411. close_tag = '</%s>\n' % tagname
  1412. return start_tag, close_tag
  1413. def visit_title(self, node):
  1414. close_tag = '</p>\n'
  1415. if isinstance(node.parent, nodes.topic):
  1416. # TODO: use role="heading" or <h1>? (HTML5 only)
  1417. self.body.append(
  1418. self.starttag(node, 'p', '', CLASS='topic-title'))
  1419. elif isinstance(node.parent, nodes.sidebar):
  1420. # TODO: use role="heading" or <h1>? (HTML5 only)
  1421. self.body.append(
  1422. self.starttag(node, 'p', '', CLASS='sidebar-title'))
  1423. elif isinstance(node.parent, nodes.Admonition):
  1424. self.body.append(
  1425. self.starttag(node, 'p', '', CLASS='admonition-title'))
  1426. elif isinstance(node.parent, nodes.table):
  1427. self.body.append(
  1428. self.starttag(node, 'caption', ''))
  1429. close_tag = '</caption>\n'
  1430. elif isinstance(node.parent, nodes.document):
  1431. self.body.append(self.starttag(node, 'h1', '', CLASS='title'))
  1432. close_tag = '</h1>\n'
  1433. self.in_document_title = len(self.body)
  1434. else:
  1435. assert isinstance(node.parent, nodes.section)
  1436. # Get correct heading and evt. backlink tags
  1437. start_tag, close_tag = self.section_title_tags(node)
  1438. self.body.append(start_tag)
  1439. self.context.append(close_tag)
  1440. def depart_title(self, node):
  1441. self.body.append(self.context.pop())
  1442. if self.in_document_title:
  1443. self.title = self.body[self.in_document_title:-1]
  1444. self.in_document_title = 0
  1445. self.body_pre_docinfo.extend(self.body)
  1446. self.html_title.extend(self.body)
  1447. del self.body[:]
  1448. def visit_title_reference(self, node):
  1449. self.body.append(self.starttag(node, 'cite', ''))
  1450. def depart_title_reference(self, node):
  1451. self.body.append('</cite>')
  1452. def visit_topic(self, node):
  1453. self.body.append(self.starttag(node, 'div', CLASS='topic'))
  1454. def depart_topic(self, node):
  1455. self.body.append('</div>\n')
  1456. def visit_transition(self, node):
  1457. self.body.append(self.emptytag(node, 'hr', CLASS='docutils'))
  1458. def depart_transition(self, node):
  1459. pass
  1460. def visit_version(self, node):
  1461. self.visit_docinfo_item(node, 'version', meta=False)
  1462. def depart_version(self, node):
  1463. self.depart_docinfo_item()
  1464. def unimplemented_visit(self, node):
  1465. raise NotImplementedError('visiting unimplemented node type: %s'
  1466. % node.__class__.__name__)
  1467. class SimpleListChecker(nodes.GenericNodeVisitor):
  1468. """
  1469. Raise `nodes.NodeFound` if non-simple list item is encountered.
  1470. Here "simple" means a list item containing nothing other than a single
  1471. paragraph, a simple list, or a paragraph followed by a simple list.
  1472. This version also checks for simple field lists and docinfo.
  1473. """
  1474. def default_visit(self, node):
  1475. raise nodes.NodeFound
  1476. def visit_list_item(self, node):
  1477. children = [child for child in node.children
  1478. if not isinstance(child, nodes.Invisible)]
  1479. if (children and isinstance(children[0], nodes.paragraph)
  1480. and (isinstance(children[-1], nodes.bullet_list)
  1481. or isinstance(children[-1], nodes.enumerated_list)
  1482. or isinstance(children[-1], nodes.field_list))):
  1483. children.pop()
  1484. if len(children) <= 1:
  1485. return
  1486. else:
  1487. raise nodes.NodeFound
  1488. def pass_node(self, node):
  1489. pass
  1490. def ignore_node(self, node):
  1491. # ignore nodes that are never complex (can contain only inline nodes)
  1492. raise nodes.SkipNode
  1493. # Paragraphs and text
  1494. visit_Text = ignore_node
  1495. visit_paragraph = ignore_node
  1496. # Lists
  1497. visit_bullet_list = pass_node
  1498. visit_enumerated_list = pass_node
  1499. visit_docinfo = pass_node
  1500. # Docinfo nodes:
  1501. visit_author = ignore_node
  1502. visit_authors = visit_list_item
  1503. visit_address = visit_list_item
  1504. visit_contact = pass_node
  1505. visit_copyright = ignore_node
  1506. visit_date = ignore_node
  1507. visit_organization = ignore_node
  1508. visit_status = ignore_node
  1509. visit_version = visit_list_item
  1510. # Definition list:
  1511. visit_definition_list = pass_node
  1512. visit_definition_list_item = pass_node
  1513. visit_term = ignore_node
  1514. visit_classifier = pass_node
  1515. visit_definition = visit_list_item
  1516. # Field list:
  1517. visit_field_list = pass_node
  1518. visit_field = pass_node
  1519. # the field body corresponds to a list item
  1520. visit_field_body = visit_list_item
  1521. visit_field_name = ignore_node
  1522. # Invisible nodes should be ignored.
  1523. visit_comment = ignore_node
  1524. visit_substitution_definition = ignore_node
  1525. visit_target = ignore_node
  1526. visit_pending = ignore_node