1
0

states.py 129 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135
  1. # $Id: states.py 9037 2022-03-05 23:31:10Z milde $
  2. # Author: David Goodger <goodger@python.org>
  3. # Copyright: This module has been placed in the public domain.
  4. """
  5. This is the ``docutils.parsers.rst.states`` module, the core of
  6. the reStructuredText parser. It defines the following:
  7. :Classes:
  8. - `RSTStateMachine`: reStructuredText parser's entry point.
  9. - `NestedStateMachine`: recursive StateMachine.
  10. - `RSTState`: reStructuredText State superclass.
  11. - `Inliner`: For parsing inline markup.
  12. - `Body`: Generic classifier of the first line of a block.
  13. - `SpecializedBody`: Superclass for compound element members.
  14. - `BulletList`: Second and subsequent bullet_list list_items
  15. - `DefinitionList`: Second+ definition_list_items.
  16. - `EnumeratedList`: Second+ enumerated_list list_items.
  17. - `FieldList`: Second+ fields.
  18. - `OptionList`: Second+ option_list_items.
  19. - `RFC2822List`: Second+ RFC2822-style fields.
  20. - `ExtensionOptions`: Parses directive option fields.
  21. - `Explicit`: Second+ explicit markup constructs.
  22. - `SubstitutionDef`: For embedded directives in substitution definitions.
  23. - `Text`: Classifier of second line of a text block.
  24. - `SpecializedText`: Superclass for continuation lines of Text-variants.
  25. - `Definition`: Second line of potential definition_list_item.
  26. - `Line`: Second line of overlined section title or transition marker.
  27. - `Struct`: An auxiliary collection class.
  28. :Exception classes:
  29. - `MarkupError`
  30. - `ParserError`
  31. - `MarkupMismatch`
  32. :Functions:
  33. - `escape2null()`: Return a string, escape-backslashes converted to nulls.
  34. - `unescape()`: Return a string, nulls removed or restored to backslashes.
  35. :Attributes:
  36. - `state_classes`: set of State classes used with `RSTStateMachine`.
  37. Parser Overview
  38. ===============
  39. The reStructuredText parser is implemented as a recursive state machine,
  40. examining its input one line at a time. To understand how the parser works,
  41. please first become familiar with the `docutils.statemachine` module. In the
  42. description below, references are made to classes defined in this module;
  43. please see the individual classes for details.
  44. Parsing proceeds as follows:
  45. 1. The state machine examines each line of input, checking each of the
  46. transition patterns of the state `Body`, in order, looking for a match.
  47. The implicit transitions (blank lines and indentation) are checked before
  48. any others. The 'text' transition is a catch-all (matches anything).
  49. 2. The method associated with the matched transition pattern is called.
  50. A. Some transition methods are self-contained, appending elements to the
  51. document tree (`Body.doctest` parses a doctest block). The parser's
  52. current line index is advanced to the end of the element, and parsing
  53. continues with step 1.
  54. B. Other transition methods trigger the creation of a nested state machine,
  55. whose job is to parse a compound construct ('indent' does a block quote,
  56. 'bullet' does a bullet list, 'overline' does a section [first checking
  57. for a valid section header], etc.).
  58. - In the case of lists and explicit markup, a one-off state machine is
  59. created and run to parse contents of the first item.
  60. - A new state machine is created and its initial state is set to the
  61. appropriate specialized state (`BulletList` in the case of the
  62. 'bullet' transition; see `SpecializedBody` for more detail). This
  63. state machine is run to parse the compound element (or series of
  64. explicit markup elements), and returns as soon as a non-member element
  65. is encountered. For example, the `BulletList` state machine ends as
  66. soon as it encounters an element which is not a list item of that
  67. bullet list. The optional omission of inter-element blank lines is
  68. enabled by this nested state machine.
  69. - The current line index is advanced to the end of the elements parsed,
  70. and parsing continues with step 1.
  71. C. The result of the 'text' transition depends on the next line of text.
  72. The current state is changed to `Text`, under which the second line is
  73. examined. If the second line is:
  74. - Indented: The element is a definition list item, and parsing proceeds
  75. similarly to step 2.B, using the `DefinitionList` state.
  76. - A line of uniform punctuation characters: The element is a section
  77. header; again, parsing proceeds as in step 2.B, and `Body` is still
  78. used.
  79. - Anything else: The element is a paragraph, which is examined for
  80. inline markup and appended to the parent element. Processing
  81. continues with step 1.
  82. """
  83. __docformat__ = 'reStructuredText'
  84. import re
  85. from types import FunctionType, MethodType
  86. from docutils import nodes, statemachine, utils
  87. from docutils import ApplicationError, DataError
  88. from docutils.statemachine import StateMachineWS, StateWS
  89. from docutils.nodes import fully_normalize_name as normalize_name
  90. from docutils.nodes import unescape, whitespace_normalize_name
  91. import docutils.parsers.rst
  92. from docutils.parsers.rst import directives, languages, tableparser, roles
  93. from docutils.utils import escape2null, column_width
  94. from docutils.utils import punctuation_chars, roman, urischemes
  95. from docutils.utils import split_escaped_whitespace
  96. class MarkupError(DataError): pass
  97. class UnknownInterpretedRoleError(DataError): pass
  98. class InterpretedRoleNotImplementedError(DataError): pass
  99. class ParserError(ApplicationError): pass
  100. class MarkupMismatch(Exception): pass
  101. class Struct:
  102. """Stores data attributes for dotted-attribute access."""
  103. def __init__(self, **keywordargs):
  104. self.__dict__.update(keywordargs)
  105. class RSTStateMachine(StateMachineWS):
  106. """
  107. reStructuredText's master StateMachine.
  108. The entry point to reStructuredText parsing is the `run()` method.
  109. """
  110. def run(self, input_lines, document, input_offset=0, match_titles=True,
  111. inliner=None):
  112. """
  113. Parse `input_lines` and modify the `document` node in place.
  114. Extend `StateMachineWS.run()`: set up parse-global data and
  115. run the StateMachine.
  116. """
  117. self.language = languages.get_language(
  118. document.settings.language_code, document.reporter)
  119. self.match_titles = match_titles
  120. if inliner is None:
  121. inliner = Inliner()
  122. inliner.init_customizations(document.settings)
  123. self.memo = Struct(document=document,
  124. reporter=document.reporter,
  125. language=self.language,
  126. title_styles=[],
  127. section_level=0,
  128. section_bubble_up_kludge=False,
  129. inliner=inliner)
  130. self.document = document
  131. self.attach_observer(document.note_source)
  132. self.reporter = self.memo.reporter
  133. self.node = document
  134. results = StateMachineWS.run(self, input_lines, input_offset,
  135. input_source=document['source'])
  136. assert results == [], 'RSTStateMachine.run() results should be empty!'
  137. self.node = self.memo = None # remove unneeded references
  138. class NestedStateMachine(StateMachineWS):
  139. """
  140. StateMachine run from within other StateMachine runs, to parse nested
  141. document structures.
  142. """
  143. def run(self, input_lines, input_offset, memo, node, match_titles=True):
  144. """
  145. Parse `input_lines` and populate a `docutils.nodes.document` instance.
  146. Extend `StateMachineWS.run()`: set up document-wide data.
  147. """
  148. self.match_titles = match_titles
  149. self.memo = memo
  150. self.document = memo.document
  151. self.attach_observer(self.document.note_source)
  152. self.reporter = memo.reporter
  153. self.language = memo.language
  154. self.node = node
  155. results = StateMachineWS.run(self, input_lines, input_offset)
  156. assert results == [], ('NestedStateMachine.run() results should be '
  157. 'empty!')
  158. return results
  159. class RSTState(StateWS):
  160. """
  161. reStructuredText State superclass.
  162. Contains methods used by all State subclasses.
  163. """
  164. nested_sm = NestedStateMachine
  165. nested_sm_cache = []
  166. def __init__(self, state_machine, debug=False):
  167. self.nested_sm_kwargs = {'state_classes': state_classes,
  168. 'initial_state': 'Body'}
  169. StateWS.__init__(self, state_machine, debug)
  170. def runtime_init(self):
  171. StateWS.runtime_init(self)
  172. memo = self.state_machine.memo
  173. self.memo = memo
  174. self.reporter = memo.reporter
  175. self.inliner = memo.inliner
  176. self.document = memo.document
  177. self.parent = self.state_machine.node
  178. # enable the reporter to determine source and source-line
  179. if not hasattr(self.reporter, 'get_source_and_line'):
  180. self.reporter.get_source_and_line = self.state_machine.get_source_and_line # noqa:E501
  181. def goto_line(self, abs_line_offset):
  182. """
  183. Jump to input line `abs_line_offset`, ignoring jumps past the end.
  184. """
  185. try:
  186. self.state_machine.goto_line(abs_line_offset)
  187. except EOFError:
  188. pass
  189. def no_match(self, context, transitions):
  190. """
  191. Override `StateWS.no_match` to generate a system message.
  192. This code should never be run.
  193. """
  194. self.reporter.severe(
  195. 'Internal error: no transition pattern match. State: "%s"; '
  196. 'transitions: %s; context: %s; current line: %r.'
  197. % (self.__class__.__name__, transitions, context,
  198. self.state_machine.line))
  199. return context, None, []
  200. def bof(self, context):
  201. """Called at beginning of file."""
  202. return [], []
  203. def nested_parse(self, block, input_offset, node, match_titles=False,
  204. state_machine_class=None, state_machine_kwargs=None):
  205. """
  206. Create a new StateMachine rooted at `node` and run it over the input
  207. `block`.
  208. """
  209. use_default = 0
  210. if state_machine_class is None:
  211. state_machine_class = self.nested_sm
  212. use_default += 1
  213. if state_machine_kwargs is None:
  214. state_machine_kwargs = self.nested_sm_kwargs
  215. use_default += 1
  216. block_length = len(block)
  217. state_machine = None
  218. if use_default == 2:
  219. try:
  220. state_machine = self.nested_sm_cache.pop()
  221. except IndexError:
  222. pass
  223. if not state_machine:
  224. state_machine = state_machine_class(debug=self.debug,
  225. **state_machine_kwargs)
  226. state_machine.run(block, input_offset, memo=self.memo,
  227. node=node, match_titles=match_titles)
  228. if use_default == 2:
  229. self.nested_sm_cache.append(state_machine)
  230. else:
  231. state_machine.unlink()
  232. new_offset = state_machine.abs_line_offset()
  233. # No `block.parent` implies disconnected -- lines aren't in sync:
  234. if block.parent and (len(block) - block_length) != 0:
  235. # Adjustment for block if modified in nested parse:
  236. self.state_machine.next_line(len(block) - block_length)
  237. return new_offset
  238. def nested_list_parse(self, block, input_offset, node, initial_state,
  239. blank_finish,
  240. blank_finish_state=None,
  241. extra_settings={},
  242. match_titles=False,
  243. state_machine_class=None,
  244. state_machine_kwargs=None):
  245. """
  246. Create a new StateMachine rooted at `node` and run it over the input
  247. `block`. Also keep track of optional intermediate blank lines and the
  248. required final one.
  249. """
  250. if state_machine_class is None:
  251. state_machine_class = self.nested_sm
  252. if state_machine_kwargs is None:
  253. state_machine_kwargs = self.nested_sm_kwargs.copy()
  254. state_machine_kwargs['initial_state'] = initial_state
  255. state_machine = state_machine_class(debug=self.debug,
  256. **state_machine_kwargs)
  257. if blank_finish_state is None:
  258. blank_finish_state = initial_state
  259. state_machine.states[blank_finish_state].blank_finish = blank_finish
  260. for key, value in extra_settings.items():
  261. setattr(state_machine.states[initial_state], key, value)
  262. state_machine.run(block, input_offset, memo=self.memo,
  263. node=node, match_titles=match_titles)
  264. blank_finish = state_machine.states[blank_finish_state].blank_finish
  265. state_machine.unlink()
  266. return state_machine.abs_line_offset(), blank_finish
  267. def section(self, title, source, style, lineno, messages):
  268. """Check for a valid subsection and create one if it checks out."""
  269. if self.check_subsection(source, style, lineno):
  270. self.new_subsection(title, lineno, messages)
  271. def check_subsection(self, source, style, lineno):
  272. """
  273. Check for a valid subsection header. Return True or False.
  274. When a new section is reached that isn't a subsection of the current
  275. section, back up the line count (use ``previous_line(-x)``), then
  276. ``raise EOFError``. The current StateMachine will finish, then the
  277. calling StateMachine can re-examine the title. This will work its way
  278. back up the calling chain until the correct section level isreached.
  279. @@@ Alternative: Evaluate the title, store the title info & level, and
  280. back up the chain until that level is reached. Store in memo? Or
  281. return in results?
  282. :Exception: `EOFError` when a sibling or supersection encountered.
  283. """
  284. memo = self.memo
  285. title_styles = memo.title_styles
  286. mylevel = memo.section_level
  287. try: # check for existing title style
  288. level = title_styles.index(style) + 1
  289. except ValueError: # new title style
  290. if len(title_styles) == memo.section_level: # new subsection
  291. title_styles.append(style)
  292. return True
  293. else: # not at lowest level
  294. self.parent += self.title_inconsistent(source, lineno)
  295. return False
  296. if level <= mylevel: # sibling or supersection
  297. memo.section_level = level # bubble up to parent section
  298. if len(style) == 2:
  299. memo.section_bubble_up_kludge = True
  300. # back up 2 lines for underline title, 3 for overline title
  301. self.state_machine.previous_line(len(style) + 1)
  302. raise EOFError # let parent section re-evaluate
  303. if level == mylevel + 1: # immediate subsection
  304. return True
  305. else: # invalid subsection
  306. self.parent += self.title_inconsistent(source, lineno)
  307. return False
  308. def title_inconsistent(self, sourcetext, lineno):
  309. error = self.reporter.severe(
  310. 'Title level inconsistent:', nodes.literal_block('', sourcetext),
  311. line=lineno)
  312. return error
  313. def new_subsection(self, title, lineno, messages):
  314. """Append new subsection to document tree. On return, check level."""
  315. memo = self.memo
  316. mylevel = memo.section_level
  317. memo.section_level += 1
  318. section_node = nodes.section()
  319. self.parent += section_node
  320. textnodes, title_messages = self.inline_text(title, lineno)
  321. titlenode = nodes.title(title, '', *textnodes)
  322. name = normalize_name(titlenode.astext())
  323. section_node['names'].append(name)
  324. section_node += titlenode
  325. section_node += messages
  326. section_node += title_messages
  327. self.document.note_implicit_target(section_node, section_node)
  328. offset = self.state_machine.line_offset + 1
  329. absoffset = self.state_machine.abs_line_offset() + 1
  330. newabsoffset = self.nested_parse(
  331. self.state_machine.input_lines[offset:], input_offset=absoffset,
  332. node=section_node, match_titles=True)
  333. self.goto_line(newabsoffset)
  334. if memo.section_level <= mylevel: # can't handle next section?
  335. raise EOFError # bubble up to supersection
  336. # reset section_level; next pass will detect it properly
  337. memo.section_level = mylevel
  338. def paragraph(self, lines, lineno):
  339. """
  340. Return a list (paragraph & messages) & a boolean: literal_block next?
  341. """
  342. data = '\n'.join(lines).rstrip()
  343. if re.search(r'(?<!\\)(\\\\)*::$', data):
  344. if len(data) == 2:
  345. return [], 1
  346. elif data[-3] in ' \n':
  347. text = data[:-3].rstrip()
  348. else:
  349. text = data[:-1]
  350. literalnext = 1
  351. else:
  352. text = data
  353. literalnext = 0
  354. textnodes, messages = self.inline_text(text, lineno)
  355. p = nodes.paragraph(data, '', *textnodes)
  356. p.source, p.line = self.state_machine.get_source_and_line(lineno)
  357. return [p] + messages, literalnext
  358. def inline_text(self, text, lineno):
  359. """
  360. Return 2 lists: nodes (text and inline elements), and system_messages.
  361. """
  362. nodes, messages = self.inliner.parse(text, lineno,
  363. self.memo, self.parent)
  364. return nodes, messages
  365. def unindent_warning(self, node_name):
  366. # the actual problem is one line below the current line
  367. lineno = self.state_machine.abs_line_number() + 1
  368. return self.reporter.warning('%s ends without a blank line; '
  369. 'unexpected unindent.' % node_name,
  370. line=lineno)
  371. def build_regexp(definition, compile=True):
  372. """
  373. Build, compile and return a regular expression based on `definition`.
  374. :Parameter: `definition`: a 4-tuple (group name, prefix, suffix, parts),
  375. where "parts" is a list of regular expressions and/or regular
  376. expression definitions to be joined into an or-group.
  377. """
  378. name, prefix, suffix, parts = definition
  379. part_strings = []
  380. for part in parts:
  381. if isinstance(part, tuple):
  382. part_strings.append(build_regexp(part, None))
  383. else:
  384. part_strings.append(part)
  385. or_group = '|'.join(part_strings)
  386. regexp = '%(prefix)s(?P<%(name)s>%(or_group)s)%(suffix)s' % locals()
  387. if compile:
  388. return re.compile(regexp)
  389. else:
  390. return regexp
  391. class Inliner:
  392. """
  393. Parse inline markup; call the `parse()` method.
  394. """
  395. def __init__(self):
  396. self.implicit_dispatch = []
  397. """List of (pattern, bound method) tuples, used by
  398. `self.implicit_inline`."""
  399. def init_customizations(self, settings):
  400. # lookahead and look-behind expressions for inline markup rules
  401. if getattr(settings, 'character_level_inline_markup', False):
  402. start_string_prefix = '(^|(?<!\x00))'
  403. end_string_suffix = ''
  404. else:
  405. start_string_prefix = ('(^|(?<=\\s|[%s%s]))' %
  406. (punctuation_chars.openers,
  407. punctuation_chars.delimiters))
  408. end_string_suffix = ('($|(?=\\s|[\x00%s%s%s]))' %
  409. (punctuation_chars.closing_delimiters,
  410. punctuation_chars.delimiters,
  411. punctuation_chars.closers))
  412. args = locals().copy()
  413. args.update(vars(self.__class__))
  414. parts = ('initial_inline', start_string_prefix, '',
  415. [
  416. ('start', '', self.non_whitespace_after, # simple start-strings
  417. [r'\*\*', # strong
  418. r'\*(?!\*)', # emphasis but not strong
  419. r'``', # literal
  420. r'_`', # inline internal target
  421. r'\|(?!\|)'] # substitution reference
  422. ),
  423. ('whole', '', end_string_suffix, # whole constructs
  424. [ # reference name & end-string
  425. r'(?P<refname>%s)(?P<refend>__?)' % self.simplename,
  426. ('footnotelabel', r'\[', r'(?P<fnend>\]_)',
  427. [r'[0-9]+', # manually numbered
  428. r'\#(%s)?' % self.simplename, # auto-numbered (w/ label?)
  429. r'\*', # auto-symbol
  430. r'(?P<citationlabel>%s)' % self.simplename, # citation ref
  431. ]
  432. )
  433. ]
  434. ),
  435. ('backquote', # interpreted text or phrase reference
  436. '(?P<role>(:%s:)?)' % self.simplename, # optional role
  437. self.non_whitespace_after,
  438. ['`(?!`)'] # but not literal
  439. )
  440. ]
  441. )
  442. self.start_string_prefix = start_string_prefix
  443. self.end_string_suffix = end_string_suffix
  444. self.parts = parts
  445. self.patterns = Struct(
  446. initial=build_regexp(parts),
  447. emphasis=re.compile(self.non_whitespace_escape_before
  448. + r'(\*)' + end_string_suffix),
  449. strong=re.compile(self.non_whitespace_escape_before
  450. + r'(\*\*)' + end_string_suffix),
  451. interpreted_or_phrase_ref=re.compile(
  452. r"""
  453. %(non_unescaped_whitespace_escape_before)s
  454. (
  455. `
  456. (?P<suffix>
  457. (?P<role>:%(simplename)s:)?
  458. (?P<refend>__?)?
  459. )
  460. )
  461. %(end_string_suffix)s
  462. """ % args, re.VERBOSE),
  463. embedded_link=re.compile(
  464. r"""
  465. (
  466. (?:[ \n]+|^) # spaces or beginning of line/string
  467. < # open bracket
  468. %(non_whitespace_after)s
  469. (([^<>]|\x00[<>])+) # anything but unescaped angle brackets
  470. %(non_whitespace_escape_before)s
  471. > # close bracket
  472. )
  473. $ # end of string
  474. """ % args, re.VERBOSE),
  475. literal=re.compile(self.non_whitespace_before + '(``)'
  476. + end_string_suffix),
  477. target=re.compile(self.non_whitespace_escape_before
  478. + r'(`)' + end_string_suffix),
  479. substitution_ref=re.compile(self.non_whitespace_escape_before
  480. + r'(\|_{0,2})'
  481. + end_string_suffix),
  482. email=re.compile(self.email_pattern % args + '$',
  483. re.VERBOSE),
  484. uri=re.compile(
  485. (r"""
  486. %(start_string_prefix)s
  487. (?P<whole>
  488. (?P<absolute> # absolute URI
  489. (?P<scheme> # scheme (http, ftp, mailto)
  490. [a-zA-Z][a-zA-Z0-9.+-]*
  491. )
  492. :
  493. (
  494. ( # either:
  495. (//?)? # hierarchical URI
  496. %(uric)s* # URI characters
  497. %(uri_end)s # final URI char
  498. )
  499. ( # optional query
  500. \?%(uric)s*
  501. %(uri_end)s
  502. )?
  503. ( # optional fragment
  504. \#%(uric)s*
  505. %(uri_end)s
  506. )?
  507. )
  508. )
  509. | # *OR*
  510. (?P<email> # email address
  511. """ + self.email_pattern + r"""
  512. )
  513. )
  514. %(end_string_suffix)s
  515. """) % args, re.VERBOSE),
  516. pep=re.compile(
  517. r"""
  518. %(start_string_prefix)s
  519. (
  520. (pep-(?P<pepnum1>\d+)(.txt)?) # reference to source file
  521. |
  522. (PEP\s+(?P<pepnum2>\d+)) # reference by name
  523. )
  524. %(end_string_suffix)s""" % args, re.VERBOSE),
  525. rfc=re.compile(
  526. r"""
  527. %(start_string_prefix)s
  528. (RFC(-|\s+)?(?P<rfcnum>\d+))
  529. %(end_string_suffix)s""" % args, re.VERBOSE))
  530. self.implicit_dispatch.append((self.patterns.uri,
  531. self.standalone_uri))
  532. if settings.pep_references:
  533. self.implicit_dispatch.append((self.patterns.pep,
  534. self.pep_reference))
  535. if settings.rfc_references:
  536. self.implicit_dispatch.append((self.patterns.rfc,
  537. self.rfc_reference))
  538. def parse(self, text, lineno, memo, parent):
  539. # Needs to be refactored for nested inline markup.
  540. # Add nested_parse() method?
  541. """
  542. Return 2 lists: nodes (text and inline elements), and system_messages.
  543. Using `self.patterns.initial`, a pattern which matches start-strings
  544. (emphasis, strong, interpreted, phrase reference, literal,
  545. substitution reference, and inline target) and complete constructs
  546. (simple reference, footnote reference), search for a candidate. When
  547. one is found, check for validity (e.g., not a quoted '*' character).
  548. If valid, search for the corresponding end string if applicable, and
  549. check it for validity. If not found or invalid, generate a warning
  550. and ignore the start-string. Implicit inline markup (e.g. standalone
  551. URIs) is found last.
  552. :text: source string
  553. :lineno: absolute line number (cf. statemachine.get_source_and_line())
  554. """
  555. self.reporter = memo.reporter
  556. self.document = memo.document
  557. self.language = memo.language
  558. self.parent = parent
  559. pattern_search = self.patterns.initial.search
  560. dispatch = self.dispatch
  561. remaining = escape2null(text)
  562. processed = []
  563. unprocessed = []
  564. messages = []
  565. while remaining:
  566. match = pattern_search(remaining)
  567. if match:
  568. groups = match.groupdict()
  569. method = dispatch[groups['start'] or groups['backquote']
  570. or groups['refend'] or groups['fnend']]
  571. before, inlines, remaining, sysmessages = method(self, match,
  572. lineno)
  573. unprocessed.append(before)
  574. messages += sysmessages
  575. if inlines:
  576. processed += self.implicit_inline(''.join(unprocessed),
  577. lineno)
  578. processed += inlines
  579. unprocessed = []
  580. else:
  581. break
  582. remaining = ''.join(unprocessed) + remaining
  583. if remaining:
  584. processed += self.implicit_inline(remaining, lineno)
  585. return processed, messages
  586. # Inline object recognition
  587. # -------------------------
  588. # See also init_customizations().
  589. non_whitespace_before = r'(?<!\s)'
  590. non_whitespace_escape_before = r'(?<![\s\x00])'
  591. non_unescaped_whitespace_escape_before = r'(?<!(?<!\x00)[\s\x00])'
  592. non_whitespace_after = r'(?!\s)'
  593. # Alphanumerics with isolated internal [-._+:] chars (i.e. not 2 together):
  594. simplename = r'(?:(?!_)\w)+(?:[-._+:](?:(?!_)\w)+)*'
  595. # Valid URI characters (see RFC 2396 & RFC 2732);
  596. # final \x00 allows backslash escapes in URIs:
  597. uric = r"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9\x00]"""
  598. # Delimiter indicating the end of a URI (not part of the URI):
  599. uri_end_delim = r"""[>]"""
  600. # Last URI character; same as uric but no punctuation:
  601. urilast = r"""[_~*/=+a-zA-Z0-9]"""
  602. # End of a URI (either 'urilast' or 'uric followed by a
  603. # uri_end_delim'):
  604. uri_end = r"""(?:%(urilast)s|%(uric)s(?=%(uri_end_delim)s))""" % locals()
  605. emailc = r"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9\x00]"""
  606. email_pattern = r"""
  607. %(emailc)s+(?:\.%(emailc)s+)* # name
  608. (?<!\x00)@ # at
  609. %(emailc)s+(?:\.%(emailc)s*)* # host
  610. %(uri_end)s # final URI char
  611. """
  612. def quoted_start(self, match):
  613. """Test if inline markup start-string is 'quoted'.
  614. 'Quoted' in this context means the start-string is enclosed in a pair
  615. of matching opening/closing delimiters (not necessarily quotes)
  616. or at the end of the match.
  617. """
  618. string = match.string
  619. start = match.start()
  620. if start == 0: # start-string at beginning of text
  621. return False
  622. prestart = string[start - 1]
  623. try:
  624. poststart = string[match.end()]
  625. except IndexError: # start-string at end of text
  626. return True # not "quoted" but no markup start-string either
  627. return punctuation_chars.match_chars(prestart, poststart)
  628. def inline_obj(self, match, lineno, end_pattern, nodeclass,
  629. restore_backslashes=False):
  630. string = match.string
  631. matchstart = match.start('start')
  632. matchend = match.end('start')
  633. if self.quoted_start(match):
  634. return string[:matchend], [], string[matchend:], [], ''
  635. endmatch = end_pattern.search(string[matchend:])
  636. if endmatch and endmatch.start(1): # 1 or more chars
  637. text = endmatch.string[:endmatch.start(1)]
  638. if restore_backslashes:
  639. text = unescape(text, True)
  640. textend = matchend + endmatch.end(1)
  641. rawsource = unescape(string[matchstart:textend], True)
  642. node = nodeclass(rawsource, text)
  643. return (string[:matchstart], [node],
  644. string[textend:], [], endmatch.group(1))
  645. msg = self.reporter.warning(
  646. 'Inline %s start-string without end-string.'
  647. % nodeclass.__name__, line=lineno)
  648. text = unescape(string[matchstart:matchend], True)
  649. prb = self.problematic(text, text, msg)
  650. return string[:matchstart], [prb], string[matchend:], [msg], ''
  651. def problematic(self, text, rawsource, message):
  652. msgid = self.document.set_id(message, self.parent)
  653. problematic = nodes.problematic(rawsource, text, refid=msgid)
  654. prbid = self.document.set_id(problematic)
  655. message.add_backref(prbid)
  656. return problematic
  657. def emphasis(self, match, lineno):
  658. before, inlines, remaining, sysmessages, endstring = self.inline_obj(
  659. match, lineno, self.patterns.emphasis, nodes.emphasis)
  660. return before, inlines, remaining, sysmessages
  661. def strong(self, match, lineno):
  662. before, inlines, remaining, sysmessages, endstring = self.inline_obj(
  663. match, lineno, self.patterns.strong, nodes.strong)
  664. return before, inlines, remaining, sysmessages
  665. def interpreted_or_phrase_ref(self, match, lineno):
  666. end_pattern = self.patterns.interpreted_or_phrase_ref
  667. string = match.string
  668. matchstart = match.start('backquote')
  669. matchend = match.end('backquote')
  670. rolestart = match.start('role')
  671. role = match.group('role')
  672. position = ''
  673. if role:
  674. role = role[1:-1]
  675. position = 'prefix'
  676. elif self.quoted_start(match):
  677. return string[:matchend], [], string[matchend:], []
  678. endmatch = end_pattern.search(string[matchend:])
  679. if endmatch and endmatch.start(1): # 1 or more chars
  680. textend = matchend + endmatch.end()
  681. if endmatch.group('role'):
  682. if role:
  683. msg = self.reporter.warning(
  684. 'Multiple roles in interpreted text (both '
  685. 'prefix and suffix present; only one allowed).',
  686. line=lineno)
  687. text = unescape(string[rolestart:textend], True)
  688. prb = self.problematic(text, text, msg)
  689. return string[:rolestart], [prb], string[textend:], [msg]
  690. role = endmatch.group('suffix')[1:-1]
  691. position = 'suffix'
  692. escaped = endmatch.string[:endmatch.start(1)]
  693. rawsource = unescape(string[matchstart:textend], True)
  694. if rawsource[-1:] == '_':
  695. if role:
  696. msg = self.reporter.warning(
  697. 'Mismatch: both interpreted text role %s and '
  698. 'reference suffix.' % position, line=lineno)
  699. text = unescape(string[rolestart:textend], True)
  700. prb = self.problematic(text, text, msg)
  701. return string[:rolestart], [prb], string[textend:], [msg]
  702. return self.phrase_ref(string[:matchstart], string[textend:],
  703. rawsource, escaped)
  704. else:
  705. rawsource = unescape(string[rolestart:textend], True)
  706. nodelist, messages = self.interpreted(rawsource, escaped, role,
  707. lineno)
  708. return (string[:rolestart], nodelist,
  709. string[textend:], messages)
  710. msg = self.reporter.warning(
  711. 'Inline interpreted text or phrase reference start-string '
  712. 'without end-string.', line=lineno)
  713. text = unescape(string[matchstart:matchend], True)
  714. prb = self.problematic(text, text, msg)
  715. return string[:matchstart], [prb], string[matchend:], [msg]
  716. def phrase_ref(self, before, after, rawsource, escaped, text=None):
  717. # `text` is ignored (since 0.16)
  718. match = self.patterns.embedded_link.search(escaped)
  719. if match: # embedded <URI> or <alias_>
  720. text = escaped[:match.start(0)]
  721. unescaped = unescape(text)
  722. rawtext = unescape(text, True)
  723. aliastext = match.group(2)
  724. rawaliastext = unescape(aliastext, True)
  725. underscore_escaped = rawaliastext.endswith(r'\_')
  726. if (aliastext.endswith('_')
  727. and not (underscore_escaped
  728. or self.patterns.uri.match(aliastext))):
  729. aliastype = 'name'
  730. alias = normalize_name(unescape(aliastext[:-1]))
  731. target = nodes.target(match.group(1), refname=alias)
  732. target.indirect_reference_name = whitespace_normalize_name(
  733. unescape(aliastext[:-1]))
  734. else:
  735. aliastype = 'uri'
  736. # remove unescaped whitespace
  737. alias_parts = split_escaped_whitespace(match.group(2))
  738. alias = ' '.join(''.join(part.split())
  739. for part in alias_parts)
  740. alias = self.adjust_uri(unescape(alias))
  741. if alias.endswith(r'\_'):
  742. alias = alias[:-2] + '_'
  743. target = nodes.target(match.group(1), refuri=alias)
  744. target.referenced = 1
  745. if not aliastext:
  746. raise ApplicationError('problem with embedded link: %r'
  747. % aliastext)
  748. if not text:
  749. text = alias
  750. unescaped = unescape(text)
  751. rawtext = rawaliastext
  752. else:
  753. text = escaped
  754. unescaped = unescape(text)
  755. target = None
  756. rawtext = unescape(escaped, True)
  757. refname = normalize_name(unescaped)
  758. reference = nodes.reference(rawsource, text,
  759. name=whitespace_normalize_name(unescaped))
  760. reference[0].rawsource = rawtext
  761. node_list = [reference]
  762. if rawsource[-2:] == '__':
  763. if target and (aliastype == 'name'):
  764. reference['refname'] = alias
  765. self.document.note_refname(reference)
  766. # self.document.note_indirect_target(target) # required?
  767. elif target and (aliastype == 'uri'):
  768. reference['refuri'] = alias
  769. else:
  770. reference['anonymous'] = 1
  771. else:
  772. if target:
  773. target['names'].append(refname)
  774. if aliastype == 'name':
  775. reference['refname'] = alias
  776. self.document.note_indirect_target(target)
  777. self.document.note_refname(reference)
  778. else:
  779. reference['refuri'] = alias
  780. self.document.note_explicit_target(target, self.parent)
  781. # target.note_referenced_by(name=refname)
  782. node_list.append(target)
  783. else:
  784. reference['refname'] = refname
  785. self.document.note_refname(reference)
  786. return before, node_list, after, []
  787. def adjust_uri(self, uri):
  788. match = self.patterns.email.match(uri)
  789. if match:
  790. return 'mailto:' + uri
  791. else:
  792. return uri
  793. def interpreted(self, rawsource, text, role, lineno):
  794. role_fn, messages = roles.role(role, self.language, lineno,
  795. self.reporter)
  796. if role_fn:
  797. nodes, messages2 = role_fn(role, rawsource, text, lineno, self)
  798. return nodes, messages + messages2
  799. else:
  800. msg = self.reporter.error(
  801. 'Unknown interpreted text role "%s".' % role,
  802. line=lineno)
  803. return ([self.problematic(rawsource, rawsource, msg)],
  804. messages + [msg])
  805. def literal(self, match, lineno):
  806. before, inlines, remaining, sysmessages, endstring = self.inline_obj(
  807. match, lineno, self.patterns.literal, nodes.literal,
  808. restore_backslashes=True)
  809. return before, inlines, remaining, sysmessages
  810. def inline_internal_target(self, match, lineno):
  811. before, inlines, remaining, sysmessages, endstring = self.inline_obj(
  812. match, lineno, self.patterns.target, nodes.target)
  813. if inlines and isinstance(inlines[0], nodes.target):
  814. assert len(inlines) == 1
  815. target = inlines[0]
  816. name = normalize_name(target.astext())
  817. target['names'].append(name)
  818. self.document.note_explicit_target(target, self.parent)
  819. return before, inlines, remaining, sysmessages
  820. def substitution_reference(self, match, lineno):
  821. before, inlines, remaining, sysmessages, endstring = self.inline_obj(
  822. match, lineno, self.patterns.substitution_ref,
  823. nodes.substitution_reference)
  824. if len(inlines) == 1:
  825. subref_node = inlines[0]
  826. if isinstance(subref_node, nodes.substitution_reference):
  827. subref_text = subref_node.astext()
  828. self.document.note_substitution_ref(subref_node, subref_text)
  829. if endstring[-1:] == '_':
  830. reference_node = nodes.reference(
  831. '|%s%s' % (subref_text, endstring), '')
  832. if endstring[-2:] == '__':
  833. reference_node['anonymous'] = 1
  834. else:
  835. reference_node['refname'] = normalize_name(subref_text)
  836. self.document.note_refname(reference_node)
  837. reference_node += subref_node
  838. inlines = [reference_node]
  839. return before, inlines, remaining, sysmessages
  840. def footnote_reference(self, match, lineno):
  841. """
  842. Handles `nodes.footnote_reference` and `nodes.citation_reference`
  843. elements.
  844. """
  845. label = match.group('footnotelabel')
  846. refname = normalize_name(label)
  847. string = match.string
  848. before = string[:match.start('whole')]
  849. remaining = string[match.end('whole'):]
  850. if match.group('citationlabel'):
  851. refnode = nodes.citation_reference('[%s]_' % label,
  852. refname=refname)
  853. refnode += nodes.Text(label)
  854. self.document.note_citation_ref(refnode)
  855. else:
  856. refnode = nodes.footnote_reference('[%s]_' % label)
  857. if refname[0] == '#':
  858. refname = refname[1:]
  859. refnode['auto'] = 1
  860. self.document.note_autofootnote_ref(refnode)
  861. elif refname == '*':
  862. refname = ''
  863. refnode['auto'] = '*'
  864. self.document.note_symbol_footnote_ref(
  865. refnode)
  866. else:
  867. refnode += nodes.Text(label)
  868. if refname:
  869. refnode['refname'] = refname
  870. self.document.note_footnote_ref(refnode)
  871. if utils.get_trim_footnote_ref_space(self.document.settings):
  872. before = before.rstrip()
  873. return before, [refnode], remaining, []
  874. def reference(self, match, lineno, anonymous=False):
  875. referencename = match.group('refname')
  876. refname = normalize_name(referencename)
  877. referencenode = nodes.reference(
  878. referencename + match.group('refend'), referencename,
  879. name=whitespace_normalize_name(referencename))
  880. referencenode[0].rawsource = referencename
  881. if anonymous:
  882. referencenode['anonymous'] = 1
  883. else:
  884. referencenode['refname'] = refname
  885. self.document.note_refname(referencenode)
  886. string = match.string
  887. matchstart = match.start('whole')
  888. matchend = match.end('whole')
  889. return string[:matchstart], [referencenode], string[matchend:], []
  890. def anonymous_reference(self, match, lineno):
  891. return self.reference(match, lineno, anonymous=True)
  892. def standalone_uri(self, match, lineno):
  893. if (not match.group('scheme')
  894. or match.group('scheme').lower() in urischemes.schemes):
  895. if match.group('email'):
  896. addscheme = 'mailto:'
  897. else:
  898. addscheme = ''
  899. text = match.group('whole')
  900. refuri = addscheme + unescape(text)
  901. reference = nodes.reference(unescape(text, True), text,
  902. refuri=refuri)
  903. return [reference]
  904. else: # not a valid scheme
  905. raise MarkupMismatch
  906. def pep_reference(self, match, lineno):
  907. text = match.group(0)
  908. if text.startswith('pep-'):
  909. pepnum = int(unescape(match.group('pepnum1')))
  910. elif text.startswith('PEP'):
  911. pepnum = int(unescape(match.group('pepnum2')))
  912. else:
  913. raise MarkupMismatch
  914. ref = (self.document.settings.pep_base_url
  915. + self.document.settings.pep_file_url_template % pepnum)
  916. return [nodes.reference(unescape(text, True), text, refuri=ref)]
  917. rfc_url = 'rfc%d.html'
  918. def rfc_reference(self, match, lineno):
  919. text = match.group(0)
  920. if text.startswith('RFC'):
  921. rfcnum = int(unescape(match.group('rfcnum')))
  922. ref = self.document.settings.rfc_base_url + self.rfc_url % rfcnum
  923. else:
  924. raise MarkupMismatch
  925. return [nodes.reference(unescape(text, True), text, refuri=ref)]
  926. def implicit_inline(self, text, lineno):
  927. """
  928. Check each of the patterns in `self.implicit_dispatch` for a match,
  929. and dispatch to the stored method for the pattern. Recursively check
  930. the text before and after the match. Return a list of `nodes.Text`
  931. and inline element nodes.
  932. """
  933. if not text:
  934. return []
  935. for pattern, method in self.implicit_dispatch:
  936. match = pattern.search(text)
  937. if match:
  938. try:
  939. # Must recurse on strings before *and* after the match;
  940. # there may be multiple patterns.
  941. return (self.implicit_inline(text[:match.start()], lineno)
  942. + method(match, lineno)
  943. + self.implicit_inline(text[match.end():], lineno))
  944. except MarkupMismatch:
  945. pass
  946. return [nodes.Text(text)]
  947. dispatch = {'*': emphasis,
  948. '**': strong,
  949. '`': interpreted_or_phrase_ref,
  950. '``': literal,
  951. '_`': inline_internal_target,
  952. ']_': footnote_reference,
  953. '|': substitution_reference,
  954. '_': reference,
  955. '__': anonymous_reference}
  956. def _loweralpha_to_int(s, _zero=(ord('a')-1)):
  957. return ord(s) - _zero
  958. def _upperalpha_to_int(s, _zero=(ord('A')-1)):
  959. return ord(s) - _zero
  960. def _lowerroman_to_int(s):
  961. return roman.fromRoman(s.upper())
  962. class Body(RSTState):
  963. """
  964. Generic classifier of the first line of a block.
  965. """
  966. double_width_pad_char = tableparser.TableParser.double_width_pad_char
  967. """Padding character for East Asian double-width text."""
  968. enum = Struct()
  969. """Enumerated list parsing information."""
  970. enum.formatinfo = {
  971. 'parens': Struct(prefix='(', suffix=')', start=1, end=-1),
  972. 'rparen': Struct(prefix='', suffix=')', start=0, end=-1),
  973. 'period': Struct(prefix='', suffix='.', start=0, end=-1)}
  974. enum.formats = enum.formatinfo.keys()
  975. enum.sequences = ['arabic', 'loweralpha', 'upperalpha',
  976. 'lowerroman', 'upperroman'] # ORDERED!
  977. enum.sequencepats = {'arabic': '[0-9]+',
  978. 'loweralpha': '[a-z]',
  979. 'upperalpha': '[A-Z]',
  980. 'lowerroman': '[ivxlcdm]+',
  981. 'upperroman': '[IVXLCDM]+'}
  982. enum.converters = {'arabic': int,
  983. 'loweralpha': _loweralpha_to_int,
  984. 'upperalpha': _upperalpha_to_int,
  985. 'lowerroman': _lowerroman_to_int,
  986. 'upperroman': roman.fromRoman}
  987. enum.sequenceregexps = {}
  988. for sequence in enum.sequences:
  989. enum.sequenceregexps[sequence] = re.compile(
  990. enum.sequencepats[sequence] + '$')
  991. grid_table_top_pat = re.compile(r'\+-[-+]+-\+ *$')
  992. """Matches the top (& bottom) of a full table)."""
  993. simple_table_top_pat = re.compile('=+( +=+)+ *$')
  994. """Matches the top of a simple table."""
  995. simple_table_border_pat = re.compile('=+[ =]*$')
  996. """Matches the bottom & header bottom of a simple table."""
  997. pats = {}
  998. """Fragments of patterns used by transitions."""
  999. pats['nonalphanum7bit'] = '[!-/:-@[-`{-~]'
  1000. pats['alpha'] = '[a-zA-Z]'
  1001. pats['alphanum'] = '[a-zA-Z0-9]'
  1002. pats['alphanumplus'] = '[a-zA-Z0-9_-]'
  1003. pats['enum'] = ('(%(arabic)s|%(loweralpha)s|%(upperalpha)s|%(lowerroman)s'
  1004. '|%(upperroman)s|#)' % enum.sequencepats)
  1005. pats['optname'] = '%(alphanum)s%(alphanumplus)s*' % pats
  1006. # @@@ Loosen up the pattern? Allow Unicode?
  1007. pats['optarg'] = '(%(alpha)s%(alphanumplus)s*|<[^<>]+>)' % pats
  1008. pats['shortopt'] = r'(-|\+)%(alphanum)s( ?%(optarg)s)?' % pats
  1009. pats['longopt'] = r'(--|/)%(optname)s([ =]%(optarg)s)?' % pats
  1010. pats['option'] = r'(%(shortopt)s|%(longopt)s)' % pats
  1011. for format in enum.formats:
  1012. pats[format] = '(?P<%s>%s%s%s)' % (
  1013. format, re.escape(enum.formatinfo[format].prefix),
  1014. pats['enum'], re.escape(enum.formatinfo[format].suffix))
  1015. patterns = {
  1016. 'bullet': '[-+*\u2022\u2023\u2043]( +|$)',
  1017. 'enumerator': r'(%(parens)s|%(rparen)s|%(period)s)( +|$)' % pats,
  1018. 'field_marker': r':(?![: ])([^:\\]|\\.|:(?!([ `]|$)))*(?<! ):( +|$)',
  1019. 'option_marker': r'%(option)s(, %(option)s)*( +| ?$)' % pats,
  1020. 'doctest': r'>>>( +|$)',
  1021. 'line_block': r'\|( +|$)',
  1022. 'grid_table_top': grid_table_top_pat,
  1023. 'simple_table_top': simple_table_top_pat,
  1024. 'explicit_markup': r'\.\.( +|$)',
  1025. 'anonymous': r'__( +|$)',
  1026. 'line': r'(%(nonalphanum7bit)s)\1* *$' % pats,
  1027. 'text': r''}
  1028. initial_transitions = (
  1029. 'bullet',
  1030. 'enumerator',
  1031. 'field_marker',
  1032. 'option_marker',
  1033. 'doctest',
  1034. 'line_block',
  1035. 'grid_table_top',
  1036. 'simple_table_top',
  1037. 'explicit_markup',
  1038. 'anonymous',
  1039. 'line',
  1040. 'text')
  1041. def indent(self, match, context, next_state):
  1042. """Block quote."""
  1043. (indented, indent, line_offset, blank_finish
  1044. ) = self.state_machine.get_indented()
  1045. elements = self.block_quote(indented, line_offset)
  1046. self.parent += elements
  1047. if not blank_finish:
  1048. self.parent += self.unindent_warning('Block quote')
  1049. return context, next_state, []
  1050. def block_quote(self, indented, line_offset):
  1051. elements = []
  1052. while indented:
  1053. blockquote = nodes.block_quote(rawsource='\n'.join(indented))
  1054. (blockquote.source, blockquote.line
  1055. ) = self.state_machine.get_source_and_line(line_offset+1)
  1056. (blockquote_lines,
  1057. attribution_lines,
  1058. attribution_offset,
  1059. indented,
  1060. new_line_offset) = self.split_attribution(indented, line_offset)
  1061. self.nested_parse(blockquote_lines, line_offset, blockquote)
  1062. elements.append(blockquote)
  1063. if attribution_lines:
  1064. attribution, messages = self.parse_attribution(
  1065. attribution_lines, line_offset+attribution_offset)
  1066. blockquote += attribution
  1067. elements += messages
  1068. line_offset = new_line_offset
  1069. while indented and not indented[0]:
  1070. indented = indented[1:]
  1071. line_offset += 1
  1072. return elements
  1073. # U+2014 is an em-dash:
  1074. attribution_pattern = re.compile('(---?(?!-)|\u2014) *(?=[^ \\n])')
  1075. def split_attribution(self, indented, line_offset):
  1076. """
  1077. Check for a block quote attribution and split it off:
  1078. * First line after a blank line must begin with a dash ("--", "---",
  1079. em-dash; matches `self.attribution_pattern`).
  1080. * Every line after that must have consistent indentation.
  1081. * Attributions must be preceded by block quote content.
  1082. Return a tuple of: (block quote content lines, attribution lines,
  1083. attribution offset, remaining indented lines, remaining lines offset).
  1084. """
  1085. blank = None
  1086. nonblank_seen = False
  1087. for i in range(len(indented)):
  1088. line = indented[i].rstrip()
  1089. if line:
  1090. if nonblank_seen and blank == i - 1: # last line blank
  1091. match = self.attribution_pattern.match(line)
  1092. if match:
  1093. attribution_end, indent = self.check_attribution(
  1094. indented, i)
  1095. if attribution_end:
  1096. a_lines = indented[i:attribution_end]
  1097. a_lines.trim_left(match.end(), end=1)
  1098. a_lines.trim_left(indent, start=1)
  1099. return (indented[:i], a_lines,
  1100. i, indented[attribution_end:],
  1101. line_offset + attribution_end)
  1102. nonblank_seen = True
  1103. else:
  1104. blank = i
  1105. else:
  1106. return indented, None, None, None, None
  1107. def check_attribution(self, indented, attribution_start):
  1108. """
  1109. Check attribution shape.
  1110. Return the index past the end of the attribution, and the indent.
  1111. """
  1112. indent = None
  1113. i = attribution_start + 1
  1114. for i in range(attribution_start + 1, len(indented)):
  1115. line = indented[i].rstrip()
  1116. if not line:
  1117. break
  1118. if indent is None:
  1119. indent = len(line) - len(line.lstrip())
  1120. elif len(line) - len(line.lstrip()) != indent:
  1121. return None, None # bad shape; not an attribution
  1122. else:
  1123. # return index of line after last attribution line:
  1124. i += 1
  1125. return i, (indent or 0)
  1126. def parse_attribution(self, indented, line_offset):
  1127. text = '\n'.join(indented).rstrip()
  1128. lineno = 1 + line_offset # line_offset is zero-based
  1129. textnodes, messages = self.inline_text(text, lineno)
  1130. node = nodes.attribution(text, '', *textnodes)
  1131. node.source, node.line = self.state_machine.get_source_and_line(lineno)
  1132. return node, messages
  1133. def bullet(self, match, context, next_state):
  1134. """Bullet list item."""
  1135. bulletlist = nodes.bullet_list()
  1136. (bulletlist.source,
  1137. bulletlist.line) = self.state_machine.get_source_and_line()
  1138. self.parent += bulletlist
  1139. bulletlist['bullet'] = match.string[0]
  1140. i, blank_finish = self.list_item(match.end())
  1141. bulletlist += i
  1142. offset = self.state_machine.line_offset + 1 # next line
  1143. new_line_offset, blank_finish = self.nested_list_parse(
  1144. self.state_machine.input_lines[offset:],
  1145. input_offset=self.state_machine.abs_line_offset() + 1,
  1146. node=bulletlist, initial_state='BulletList',
  1147. blank_finish=blank_finish)
  1148. self.goto_line(new_line_offset)
  1149. if not blank_finish:
  1150. self.parent += self.unindent_warning('Bullet list')
  1151. return [], next_state, []
  1152. def list_item(self, indent):
  1153. if self.state_machine.line[indent:]:
  1154. indented, line_offset, blank_finish = (
  1155. self.state_machine.get_known_indented(indent))
  1156. else:
  1157. indented, indent, line_offset, blank_finish = (
  1158. self.state_machine.get_first_known_indented(indent))
  1159. listitem = nodes.list_item('\n'.join(indented))
  1160. if indented:
  1161. self.nested_parse(indented, input_offset=line_offset,
  1162. node=listitem)
  1163. return listitem, blank_finish
  1164. def enumerator(self, match, context, next_state):
  1165. """Enumerated List Item"""
  1166. format, sequence, text, ordinal = self.parse_enumerator(match)
  1167. if not self.is_enumerated_list_item(ordinal, sequence, format):
  1168. raise statemachine.TransitionCorrection('text')
  1169. enumlist = nodes.enumerated_list()
  1170. self.parent += enumlist
  1171. if sequence == '#':
  1172. enumlist['enumtype'] = 'arabic'
  1173. else:
  1174. enumlist['enumtype'] = sequence
  1175. enumlist['prefix'] = self.enum.formatinfo[format].prefix
  1176. enumlist['suffix'] = self.enum.formatinfo[format].suffix
  1177. if ordinal != 1:
  1178. enumlist['start'] = ordinal
  1179. msg = self.reporter.info(
  1180. 'Enumerated list start value not ordinal-1: "%s" (ordinal %s)'
  1181. % (text, ordinal))
  1182. self.parent += msg
  1183. listitem, blank_finish = self.list_item(match.end())
  1184. enumlist += listitem
  1185. offset = self.state_machine.line_offset + 1 # next line
  1186. newline_offset, blank_finish = self.nested_list_parse(
  1187. self.state_machine.input_lines[offset:],
  1188. input_offset=self.state_machine.abs_line_offset() + 1,
  1189. node=enumlist, initial_state='EnumeratedList',
  1190. blank_finish=blank_finish,
  1191. extra_settings={'lastordinal': ordinal,
  1192. 'format': format,
  1193. 'auto': sequence == '#'})
  1194. self.goto_line(newline_offset)
  1195. if not blank_finish:
  1196. self.parent += self.unindent_warning('Enumerated list')
  1197. return [], next_state, []
  1198. def parse_enumerator(self, match, expected_sequence=None):
  1199. """
  1200. Analyze an enumerator and return the results.
  1201. :Return:
  1202. - the enumerator format ('period', 'parens', or 'rparen'),
  1203. - the sequence used ('arabic', 'loweralpha', 'upperroman', etc.),
  1204. - the text of the enumerator, stripped of formatting, and
  1205. - the ordinal value of the enumerator ('a' -> 1, 'ii' -> 2, etc.;
  1206. ``None`` is returned for invalid enumerator text).
  1207. The enumerator format has already been determined by the regular
  1208. expression match. If `expected_sequence` is given, that sequence is
  1209. tried first. If not, we check for Roman numeral 1. This way,
  1210. single-character Roman numerals (which are also alphabetical) can be
  1211. matched. If no sequence has been matched, all sequences are checked in
  1212. order.
  1213. """
  1214. groupdict = match.groupdict()
  1215. sequence = ''
  1216. for format in self.enum.formats:
  1217. if groupdict[format]: # was this the format matched?
  1218. break # yes; keep `format`
  1219. else: # shouldn't happen
  1220. raise ParserError('enumerator format not matched')
  1221. text = groupdict[format][self.enum.formatinfo[format].start # noqa: E203,E501
  1222. : self.enum.formatinfo[format].end]
  1223. if text == '#':
  1224. sequence = '#'
  1225. elif expected_sequence:
  1226. try:
  1227. if self.enum.sequenceregexps[expected_sequence].match(text):
  1228. sequence = expected_sequence
  1229. except KeyError: # shouldn't happen
  1230. raise ParserError('unknown enumerator sequence: %s'
  1231. % sequence)
  1232. elif text == 'i':
  1233. sequence = 'lowerroman'
  1234. elif text == 'I':
  1235. sequence = 'upperroman'
  1236. if not sequence:
  1237. for sequence in self.enum.sequences:
  1238. if self.enum.sequenceregexps[sequence].match(text):
  1239. break
  1240. else: # shouldn't happen
  1241. raise ParserError('enumerator sequence not matched')
  1242. if sequence == '#':
  1243. ordinal = 1
  1244. else:
  1245. try:
  1246. ordinal = self.enum.converters[sequence](text)
  1247. except roman.InvalidRomanNumeralError:
  1248. ordinal = None
  1249. return format, sequence, text, ordinal
  1250. def is_enumerated_list_item(self, ordinal, sequence, format):
  1251. """
  1252. Check validity based on the ordinal value and the second line.
  1253. Return true if the ordinal is valid and the second line is blank,
  1254. indented, or starts with the next enumerator or an auto-enumerator.
  1255. """
  1256. if ordinal is None:
  1257. return None
  1258. try:
  1259. next_line = self.state_machine.next_line()
  1260. except EOFError: # end of input lines
  1261. self.state_machine.previous_line()
  1262. return 1
  1263. else:
  1264. self.state_machine.previous_line()
  1265. if not next_line[:1].strip(): # blank or indented
  1266. return 1
  1267. result = self.make_enumerator(ordinal + 1, sequence, format)
  1268. if result:
  1269. next_enumerator, auto_enumerator = result
  1270. try:
  1271. if (next_line.startswith(next_enumerator)
  1272. or next_line.startswith(auto_enumerator)):
  1273. return 1
  1274. except TypeError:
  1275. pass
  1276. return None
  1277. def make_enumerator(self, ordinal, sequence, format):
  1278. """
  1279. Construct and return the next enumerated list item marker, and an
  1280. auto-enumerator ("#" instead of the regular enumerator).
  1281. Return ``None`` for invalid (out of range) ordinals.
  1282. """
  1283. if sequence == '#':
  1284. enumerator = '#'
  1285. elif sequence == 'arabic':
  1286. enumerator = str(ordinal)
  1287. else:
  1288. if sequence.endswith('alpha'):
  1289. if ordinal > 26:
  1290. return None
  1291. enumerator = chr(ordinal + ord('a') - 1)
  1292. elif sequence.endswith('roman'):
  1293. try:
  1294. enumerator = roman.toRoman(ordinal)
  1295. except roman.RomanError:
  1296. return None
  1297. else: # shouldn't happen
  1298. raise ParserError('unknown enumerator sequence: "%s"'
  1299. % sequence)
  1300. if sequence.startswith('lower'):
  1301. enumerator = enumerator.lower()
  1302. elif sequence.startswith('upper'):
  1303. enumerator = enumerator.upper()
  1304. else: # shouldn't happen
  1305. raise ParserError('unknown enumerator sequence: "%s"'
  1306. % sequence)
  1307. formatinfo = self.enum.formatinfo[format]
  1308. next_enumerator = (formatinfo.prefix + enumerator + formatinfo.suffix
  1309. + ' ')
  1310. auto_enumerator = formatinfo.prefix + '#' + formatinfo.suffix + ' '
  1311. return next_enumerator, auto_enumerator
  1312. def field_marker(self, match, context, next_state):
  1313. """Field list item."""
  1314. field_list = nodes.field_list()
  1315. self.parent += field_list
  1316. field, blank_finish = self.field(match)
  1317. field_list += field
  1318. offset = self.state_machine.line_offset + 1 # next line
  1319. newline_offset, blank_finish = self.nested_list_parse(
  1320. self.state_machine.input_lines[offset:],
  1321. input_offset=self.state_machine.abs_line_offset() + 1,
  1322. node=field_list, initial_state='FieldList',
  1323. blank_finish=blank_finish)
  1324. self.goto_line(newline_offset)
  1325. if not blank_finish:
  1326. self.parent += self.unindent_warning('Field list')
  1327. return [], next_state, []
  1328. def field(self, match):
  1329. name = self.parse_field_marker(match)
  1330. src, srcline = self.state_machine.get_source_and_line()
  1331. lineno = self.state_machine.abs_line_number()
  1332. (indented, indent, line_offset, blank_finish
  1333. ) = self.state_machine.get_first_known_indented(match.end())
  1334. field_node = nodes.field()
  1335. field_node.source = src
  1336. field_node.line = srcline
  1337. name_nodes, name_messages = self.inline_text(name, lineno)
  1338. field_node += nodes.field_name(name, '', *name_nodes)
  1339. field_body = nodes.field_body('\n'.join(indented), *name_messages)
  1340. field_node += field_body
  1341. if indented:
  1342. self.parse_field_body(indented, line_offset, field_body)
  1343. return field_node, blank_finish
  1344. def parse_field_marker(self, match):
  1345. """Extract & return field name from a field marker match."""
  1346. field = match.group()[1:] # strip off leading ':'
  1347. field = field[:field.rfind(':')] # strip off trailing ':' etc.
  1348. return field
  1349. def parse_field_body(self, indented, offset, node):
  1350. self.nested_parse(indented, input_offset=offset, node=node)
  1351. def option_marker(self, match, context, next_state):
  1352. """Option list item."""
  1353. optionlist = nodes.option_list()
  1354. (optionlist.source, optionlist.line
  1355. ) = self.state_machine.get_source_and_line()
  1356. try:
  1357. listitem, blank_finish = self.option_list_item(match)
  1358. except MarkupError as error:
  1359. # This shouldn't happen; pattern won't match.
  1360. msg = self.reporter.error('Invalid option list marker: %s'
  1361. % error)
  1362. self.parent += msg
  1363. (indented, indent, line_offset, blank_finish
  1364. ) = self.state_machine.get_first_known_indented(match.end())
  1365. elements = self.block_quote(indented, line_offset)
  1366. self.parent += elements
  1367. if not blank_finish:
  1368. self.parent += self.unindent_warning('Option list')
  1369. return [], next_state, []
  1370. self.parent += optionlist
  1371. optionlist += listitem
  1372. offset = self.state_machine.line_offset + 1 # next line
  1373. newline_offset, blank_finish = self.nested_list_parse(
  1374. self.state_machine.input_lines[offset:],
  1375. input_offset=self.state_machine.abs_line_offset() + 1,
  1376. node=optionlist, initial_state='OptionList',
  1377. blank_finish=blank_finish)
  1378. self.goto_line(newline_offset)
  1379. if not blank_finish:
  1380. self.parent += self.unindent_warning('Option list')
  1381. return [], next_state, []
  1382. def option_list_item(self, match):
  1383. offset = self.state_machine.abs_line_offset()
  1384. options = self.parse_option_marker(match)
  1385. (indented, indent, line_offset, blank_finish
  1386. ) = self.state_machine.get_first_known_indented(match.end())
  1387. if not indented: # not an option list item
  1388. self.goto_line(offset)
  1389. raise statemachine.TransitionCorrection('text')
  1390. option_group = nodes.option_group('', *options)
  1391. description = nodes.description('\n'.join(indented))
  1392. option_list_item = nodes.option_list_item('', option_group,
  1393. description)
  1394. if indented:
  1395. self.nested_parse(indented, input_offset=line_offset,
  1396. node=description)
  1397. return option_list_item, blank_finish
  1398. def parse_option_marker(self, match):
  1399. """
  1400. Return a list of `node.option` and `node.option_argument` objects,
  1401. parsed from an option marker match.
  1402. :Exception: `MarkupError` for invalid option markers.
  1403. """
  1404. optlist = []
  1405. optionstrings = match.group().rstrip().split(', ')
  1406. for optionstring in optionstrings:
  1407. tokens = optionstring.split()
  1408. delimiter = ' '
  1409. firstopt = tokens[0].split('=', 1)
  1410. if len(firstopt) > 1:
  1411. # "--opt=value" form
  1412. tokens[:1] = firstopt
  1413. delimiter = '='
  1414. elif (len(tokens[0]) > 2
  1415. and ((tokens[0].startswith('-')
  1416. and not tokens[0].startswith('--'))
  1417. or tokens[0].startswith('+'))):
  1418. # "-ovalue" form
  1419. tokens[:1] = [tokens[0][:2], tokens[0][2:]]
  1420. delimiter = ''
  1421. if len(tokens) > 1 and (tokens[1].startswith('<')
  1422. and tokens[-1].endswith('>')):
  1423. # "-o <value1 value2>" form; join all values into one token
  1424. tokens[1:] = [' '.join(tokens[1:])]
  1425. if 0 < len(tokens) <= 2:
  1426. option = nodes.option(optionstring)
  1427. option += nodes.option_string(tokens[0], tokens[0])
  1428. if len(tokens) > 1:
  1429. option += nodes.option_argument(tokens[1], tokens[1],
  1430. delimiter=delimiter)
  1431. optlist.append(option)
  1432. else:
  1433. raise MarkupError(
  1434. 'wrong number of option tokens (=%s), should be 1 or 2: '
  1435. '"%s"' % (len(tokens), optionstring))
  1436. return optlist
  1437. def doctest(self, match, context, next_state):
  1438. data = '\n'.join(self.state_machine.get_text_block())
  1439. # TODO: prepend class value ['pycon'] (Python Console)
  1440. # parse with `directives.body.CodeBlock` (returns literal-block
  1441. # with class "code" and syntax highlight markup).
  1442. self.parent += nodes.doctest_block(data, data)
  1443. return [], next_state, []
  1444. def line_block(self, match, context, next_state):
  1445. """First line of a line block."""
  1446. block = nodes.line_block()
  1447. self.parent += block
  1448. lineno = self.state_machine.abs_line_number()
  1449. line, messages, blank_finish = self.line_block_line(match, lineno)
  1450. block += line
  1451. self.parent += messages
  1452. if not blank_finish:
  1453. offset = self.state_machine.line_offset + 1 # next line
  1454. new_line_offset, blank_finish = self.nested_list_parse(
  1455. self.state_machine.input_lines[offset:],
  1456. input_offset=self.state_machine.abs_line_offset() + 1,
  1457. node=block, initial_state='LineBlock',
  1458. blank_finish=0)
  1459. self.goto_line(new_line_offset)
  1460. if not blank_finish:
  1461. self.parent += self.reporter.warning(
  1462. 'Line block ends without a blank line.',
  1463. line=lineno+1)
  1464. if len(block):
  1465. if block[0].indent is None:
  1466. block[0].indent = 0
  1467. self.nest_line_block_lines(block)
  1468. return [], next_state, []
  1469. def line_block_line(self, match, lineno):
  1470. """Return one line element of a line_block."""
  1471. (indented, indent, line_offset, blank_finish
  1472. ) = self.state_machine.get_first_known_indented(match.end(),
  1473. until_blank=True)
  1474. text = '\n'.join(indented)
  1475. text_nodes, messages = self.inline_text(text, lineno)
  1476. line = nodes.line(text, '', *text_nodes)
  1477. if match.string.rstrip() != '|': # not empty
  1478. line.indent = len(match.group(1)) - 1
  1479. return line, messages, blank_finish
  1480. def nest_line_block_lines(self, block):
  1481. for index in range(1, len(block)):
  1482. if getattr(block[index], 'indent', None) is None:
  1483. block[index].indent = block[index - 1].indent
  1484. self.nest_line_block_segment(block)
  1485. def nest_line_block_segment(self, block):
  1486. indents = [item.indent for item in block]
  1487. least = min(indents)
  1488. new_items = []
  1489. new_block = nodes.line_block()
  1490. for item in block:
  1491. if item.indent > least:
  1492. new_block.append(item)
  1493. else:
  1494. if len(new_block):
  1495. self.nest_line_block_segment(new_block)
  1496. new_items.append(new_block)
  1497. new_block = nodes.line_block()
  1498. new_items.append(item)
  1499. if len(new_block):
  1500. self.nest_line_block_segment(new_block)
  1501. new_items.append(new_block)
  1502. block[:] = new_items
  1503. def grid_table_top(self, match, context, next_state):
  1504. """Top border of a full table."""
  1505. return self.table_top(match, context, next_state,
  1506. self.isolate_grid_table,
  1507. tableparser.GridTableParser)
  1508. def simple_table_top(self, match, context, next_state):
  1509. """Top border of a simple table."""
  1510. return self.table_top(match, context, next_state,
  1511. self.isolate_simple_table,
  1512. tableparser.SimpleTableParser)
  1513. def table_top(self, match, context, next_state,
  1514. isolate_function, parser_class):
  1515. """Top border of a generic table."""
  1516. nodelist, blank_finish = self.table(isolate_function, parser_class)
  1517. self.parent += nodelist
  1518. if not blank_finish:
  1519. msg = self.reporter.warning(
  1520. 'Blank line required after table.',
  1521. line=self.state_machine.abs_line_number()+1)
  1522. self.parent += msg
  1523. return [], next_state, []
  1524. def table(self, isolate_function, parser_class):
  1525. """Parse a table."""
  1526. block, messages, blank_finish = isolate_function()
  1527. if block:
  1528. try:
  1529. parser = parser_class()
  1530. tabledata = parser.parse(block)
  1531. tableline = (self.state_machine.abs_line_number() - len(block)
  1532. + 1)
  1533. table = self.build_table(tabledata, tableline)
  1534. nodelist = [table] + messages
  1535. except tableparser.TableMarkupError as err:
  1536. nodelist = self.malformed_table(block, ' '.join(err.args),
  1537. offset=err.offset) + messages
  1538. else:
  1539. nodelist = messages
  1540. return nodelist, blank_finish
  1541. def isolate_grid_table(self):
  1542. messages = []
  1543. blank_finish = 1
  1544. try:
  1545. block = self.state_machine.get_text_block(flush_left=True)
  1546. except statemachine.UnexpectedIndentationError as err:
  1547. block, src, srcline = err.args
  1548. messages.append(self.reporter.error('Unexpected indentation.',
  1549. source=src, line=srcline))
  1550. blank_finish = 0
  1551. block.disconnect()
  1552. # for East Asian chars:
  1553. block.pad_double_width(self.double_width_pad_char)
  1554. width = len(block[0].strip())
  1555. for i in range(len(block)):
  1556. block[i] = block[i].strip()
  1557. if block[i][0] not in '+|': # check left edge
  1558. blank_finish = 0
  1559. self.state_machine.previous_line(len(block) - i)
  1560. del block[i:]
  1561. break
  1562. if not self.grid_table_top_pat.match(block[-1]): # find bottom
  1563. blank_finish = 0
  1564. # from second-last to third line of table:
  1565. for i in range(len(block) - 2, 1, -1):
  1566. if self.grid_table_top_pat.match(block[i]):
  1567. self.state_machine.previous_line(len(block) - i + 1)
  1568. del block[i+1:]
  1569. break
  1570. else:
  1571. messages.extend(self.malformed_table(block))
  1572. return [], messages, blank_finish
  1573. for i in range(len(block)): # check right edge
  1574. if len(block[i]) != width or block[i][-1] not in '+|':
  1575. messages.extend(self.malformed_table(block))
  1576. return [], messages, blank_finish
  1577. return block, messages, blank_finish
  1578. def isolate_simple_table(self):
  1579. start = self.state_machine.line_offset
  1580. lines = self.state_machine.input_lines
  1581. limit = len(lines) - 1
  1582. toplen = len(lines[start].strip())
  1583. pattern_match = self.simple_table_border_pat.match
  1584. found = 0
  1585. found_at = None
  1586. i = start + 1
  1587. while i <= limit:
  1588. line = lines[i]
  1589. match = pattern_match(line)
  1590. if match:
  1591. if len(line.strip()) != toplen:
  1592. self.state_machine.next_line(i - start)
  1593. messages = self.malformed_table(
  1594. lines[start:i+1], 'Bottom/header table border does '
  1595. 'not match top border.')
  1596. return [], messages, i == limit or not lines[i+1].strip()
  1597. found += 1
  1598. found_at = i
  1599. if found == 2 or i == limit or not lines[i+1].strip():
  1600. end = i
  1601. break
  1602. i += 1
  1603. else: # reached end of input_lines
  1604. if found:
  1605. extra = ' or no blank line after table bottom'
  1606. self.state_machine.next_line(found_at - start)
  1607. block = lines[start:found_at+1]
  1608. else:
  1609. extra = ''
  1610. self.state_machine.next_line(i - start - 1)
  1611. block = lines[start:]
  1612. messages = self.malformed_table(
  1613. block, 'No bottom table border found%s.' % extra)
  1614. return [], messages, not extra
  1615. self.state_machine.next_line(end - start)
  1616. block = lines[start:end+1]
  1617. # for East Asian chars:
  1618. block.pad_double_width(self.double_width_pad_char)
  1619. return block, [], end == limit or not lines[end+1].strip()
  1620. def malformed_table(self, block, detail='', offset=0):
  1621. block.replace(self.double_width_pad_char, '')
  1622. data = '\n'.join(block)
  1623. message = 'Malformed table.'
  1624. startline = self.state_machine.abs_line_number() - len(block) + 1
  1625. if detail:
  1626. message += '\n' + detail
  1627. error = self.reporter.error(message, nodes.literal_block(data, data),
  1628. line=startline+offset)
  1629. return [error]
  1630. def build_table(self, tabledata, tableline, stub_columns=0, widths=None):
  1631. colwidths, headrows, bodyrows = tabledata
  1632. table = nodes.table()
  1633. if widths == 'auto':
  1634. table['classes'] += ['colwidths-auto']
  1635. elif widths: # "grid" or list of integers
  1636. table['classes'] += ['colwidths-given']
  1637. tgroup = nodes.tgroup(cols=len(colwidths))
  1638. table += tgroup
  1639. for colwidth in colwidths:
  1640. colspec = nodes.colspec(colwidth=colwidth)
  1641. if stub_columns:
  1642. colspec.attributes['stub'] = 1
  1643. stub_columns -= 1
  1644. tgroup += colspec
  1645. if headrows:
  1646. thead = nodes.thead()
  1647. tgroup += thead
  1648. for row in headrows:
  1649. thead += self.build_table_row(row, tableline)
  1650. tbody = nodes.tbody()
  1651. tgroup += tbody
  1652. for row in bodyrows:
  1653. tbody += self.build_table_row(row, tableline)
  1654. return table
  1655. def build_table_row(self, rowdata, tableline):
  1656. row = nodes.row()
  1657. for cell in rowdata:
  1658. if cell is None:
  1659. continue
  1660. morerows, morecols, offset, cellblock = cell
  1661. attributes = {}
  1662. if morerows:
  1663. attributes['morerows'] = morerows
  1664. if morecols:
  1665. attributes['morecols'] = morecols
  1666. entry = nodes.entry(**attributes)
  1667. row += entry
  1668. if ''.join(cellblock):
  1669. self.nested_parse(cellblock, input_offset=tableline+offset,
  1670. node=entry)
  1671. return row
  1672. explicit = Struct()
  1673. """Patterns and constants used for explicit markup recognition."""
  1674. explicit.patterns = Struct(
  1675. target=re.compile(r"""
  1676. (
  1677. _ # anonymous target
  1678. | # *OR*
  1679. (?!_) # no underscore at the beginning
  1680. (?P<quote>`?) # optional open quote
  1681. (?![ `]) # first char. not space or
  1682. # backquote
  1683. (?P<name> # reference name
  1684. .+?
  1685. )
  1686. %(non_whitespace_escape_before)s
  1687. (?P=quote) # close quote if open quote used
  1688. )
  1689. (?<!(?<!\x00):) # no unescaped colon at end
  1690. %(non_whitespace_escape_before)s
  1691. [ ]? # optional space
  1692. : # end of reference name
  1693. ([ ]+|$) # followed by whitespace
  1694. """ % vars(Inliner), re.VERBOSE),
  1695. reference=re.compile(r"""
  1696. (
  1697. (?P<simple>%(simplename)s)_
  1698. | # *OR*
  1699. ` # open backquote
  1700. (?![ ]) # not space
  1701. (?P<phrase>.+?) # hyperlink phrase
  1702. %(non_whitespace_escape_before)s
  1703. `_ # close backquote,
  1704. # reference mark
  1705. )
  1706. $ # end of string
  1707. """ % vars(Inliner), re.VERBOSE),
  1708. substitution=re.compile(r"""
  1709. (
  1710. (?![ ]) # first char. not space
  1711. (?P<name>.+?) # substitution text
  1712. %(non_whitespace_escape_before)s
  1713. \| # close delimiter
  1714. )
  1715. ([ ]+|$) # followed by whitespace
  1716. """ % vars(Inliner),
  1717. re.VERBOSE),)
  1718. def footnote(self, match):
  1719. src, srcline = self.state_machine.get_source_and_line()
  1720. (indented, indent, offset, blank_finish
  1721. ) = self.state_machine.get_first_known_indented(match.end())
  1722. label = match.group(1)
  1723. name = normalize_name(label)
  1724. footnote = nodes.footnote('\n'.join(indented))
  1725. footnote.source = src
  1726. footnote.line = srcline
  1727. if name[0] == '#': # auto-numbered
  1728. name = name[1:] # autonumber label
  1729. footnote['auto'] = 1
  1730. if name:
  1731. footnote['names'].append(name)
  1732. self.document.note_autofootnote(footnote)
  1733. elif name == '*': # auto-symbol
  1734. name = ''
  1735. footnote['auto'] = '*'
  1736. self.document.note_symbol_footnote(footnote)
  1737. else: # manually numbered
  1738. footnote += nodes.label('', label)
  1739. footnote['names'].append(name)
  1740. self.document.note_footnote(footnote)
  1741. if name:
  1742. self.document.note_explicit_target(footnote, footnote)
  1743. else:
  1744. self.document.set_id(footnote, footnote)
  1745. if indented:
  1746. self.nested_parse(indented, input_offset=offset, node=footnote)
  1747. return [footnote], blank_finish
  1748. def citation(self, match):
  1749. src, srcline = self.state_machine.get_source_and_line()
  1750. (indented, indent, offset, blank_finish
  1751. ) = self.state_machine.get_first_known_indented(match.end())
  1752. label = match.group(1)
  1753. name = normalize_name(label)
  1754. citation = nodes.citation('\n'.join(indented))
  1755. citation.source = src
  1756. citation.line = srcline
  1757. citation += nodes.label('', label)
  1758. citation['names'].append(name)
  1759. self.document.note_citation(citation)
  1760. self.document.note_explicit_target(citation, citation)
  1761. if indented:
  1762. self.nested_parse(indented, input_offset=offset, node=citation)
  1763. return [citation], blank_finish
  1764. def hyperlink_target(self, match):
  1765. pattern = self.explicit.patterns.target
  1766. lineno = self.state_machine.abs_line_number()
  1767. (block, indent, offset, blank_finish
  1768. ) = self.state_machine.get_first_known_indented(
  1769. match.end(), until_blank=True, strip_indent=False)
  1770. blocktext = match.string[:match.end()] + '\n'.join(block)
  1771. block = [escape2null(line) for line in block]
  1772. escaped = block[0]
  1773. blockindex = 0
  1774. while True:
  1775. targetmatch = pattern.match(escaped)
  1776. if targetmatch:
  1777. break
  1778. blockindex += 1
  1779. try:
  1780. escaped += block[blockindex]
  1781. except IndexError:
  1782. raise MarkupError('malformed hyperlink target.')
  1783. del block[:blockindex]
  1784. block[0] = (block[0] + ' ')[targetmatch.end()-len(escaped)-1:].strip()
  1785. target = self.make_target(block, blocktext, lineno,
  1786. targetmatch.group('name'))
  1787. return [target], blank_finish
  1788. def make_target(self, block, block_text, lineno, target_name):
  1789. target_type, data = self.parse_target(block, block_text, lineno)
  1790. if target_type == 'refname':
  1791. target = nodes.target(block_text, '', refname=normalize_name(data))
  1792. target.indirect_reference_name = data
  1793. self.add_target(target_name, '', target, lineno)
  1794. self.document.note_indirect_target(target)
  1795. return target
  1796. elif target_type == 'refuri':
  1797. target = nodes.target(block_text, '')
  1798. self.add_target(target_name, data, target, lineno)
  1799. return target
  1800. else:
  1801. return data
  1802. def parse_target(self, block, block_text, lineno):
  1803. """
  1804. Determine the type of reference of a target.
  1805. :Return: A 2-tuple, one of:
  1806. - 'refname' and the indirect reference name
  1807. - 'refuri' and the URI
  1808. - 'malformed' and a system_message node
  1809. """
  1810. if block and block[-1].strip()[-1:] == '_': # possible indirect target
  1811. reference = ' '.join(line.strip() for line in block)
  1812. refname = self.is_reference(reference)
  1813. if refname:
  1814. return 'refname', refname
  1815. ref_parts = split_escaped_whitespace(' '.join(block))
  1816. reference = ' '.join(''.join(unescape(part).split())
  1817. for part in ref_parts)
  1818. return 'refuri', reference
  1819. def is_reference(self, reference):
  1820. match = self.explicit.patterns.reference.match(
  1821. whitespace_normalize_name(reference))
  1822. if not match:
  1823. return None
  1824. return unescape(match.group('simple') or match.group('phrase'))
  1825. def add_target(self, targetname, refuri, target, lineno):
  1826. target.line = lineno
  1827. if targetname:
  1828. name = normalize_name(unescape(targetname))
  1829. target['names'].append(name)
  1830. if refuri:
  1831. uri = self.inliner.adjust_uri(refuri)
  1832. if uri:
  1833. target['refuri'] = uri
  1834. else:
  1835. raise ApplicationError('problem with URI: %r' % refuri)
  1836. self.document.note_explicit_target(target, self.parent)
  1837. else: # anonymous target
  1838. if refuri:
  1839. target['refuri'] = refuri
  1840. target['anonymous'] = 1
  1841. self.document.note_anonymous_target(target)
  1842. def substitution_def(self, match):
  1843. pattern = self.explicit.patterns.substitution
  1844. src, srcline = self.state_machine.get_source_and_line()
  1845. (block, indent, offset, blank_finish
  1846. ) = self.state_machine.get_first_known_indented(match.end(),
  1847. strip_indent=False)
  1848. blocktext = (match.string[:match.end()] + '\n'.join(block))
  1849. block.disconnect()
  1850. escaped = escape2null(block[0].rstrip())
  1851. blockindex = 0
  1852. while True:
  1853. subdefmatch = pattern.match(escaped)
  1854. if subdefmatch:
  1855. break
  1856. blockindex += 1
  1857. try:
  1858. escaped = escaped + ' ' + escape2null(
  1859. block[blockindex].strip())
  1860. except IndexError:
  1861. raise MarkupError('malformed substitution definition.')
  1862. del block[:blockindex] # strip out the substitution marker
  1863. start = subdefmatch.end()-len(escaped)-1
  1864. block[0] = (block[0].strip() + ' ')[start:-1]
  1865. if not block[0]:
  1866. del block[0]
  1867. offset += 1
  1868. while block and not block[-1].strip():
  1869. block.pop()
  1870. subname = subdefmatch.group('name')
  1871. substitution_node = nodes.substitution_definition(blocktext)
  1872. substitution_node.source = src
  1873. substitution_node.line = srcline
  1874. if not block:
  1875. msg = self.reporter.warning(
  1876. 'Substitution definition "%s" missing contents.' % subname,
  1877. nodes.literal_block(blocktext, blocktext),
  1878. source=src, line=srcline)
  1879. return [msg], blank_finish
  1880. block[0] = block[0].strip()
  1881. substitution_node['names'].append(
  1882. nodes.whitespace_normalize_name(subname))
  1883. new_abs_offset, blank_finish = self.nested_list_parse(
  1884. block, input_offset=offset, node=substitution_node,
  1885. initial_state='SubstitutionDef', blank_finish=blank_finish)
  1886. i = 0
  1887. for node in substitution_node[:]:
  1888. if not (isinstance(node, nodes.Inline)
  1889. or isinstance(node, nodes.Text)):
  1890. self.parent += substitution_node[i]
  1891. del substitution_node[i]
  1892. else:
  1893. i += 1
  1894. for node in substitution_node.findall(nodes.Element):
  1895. if self.disallowed_inside_substitution_definitions(node):
  1896. pformat = nodes.literal_block('', node.pformat().rstrip())
  1897. msg = self.reporter.error(
  1898. 'Substitution definition contains illegal element <%s>:'
  1899. % node.tagname,
  1900. pformat, nodes.literal_block(blocktext, blocktext),
  1901. source=src, line=srcline)
  1902. return [msg], blank_finish
  1903. if len(substitution_node) == 0:
  1904. msg = self.reporter.warning(
  1905. 'Substitution definition "%s" empty or invalid.' % subname,
  1906. nodes.literal_block(blocktext, blocktext),
  1907. source=src, line=srcline)
  1908. return [msg], blank_finish
  1909. self.document.note_substitution_def(
  1910. substitution_node, subname, self.parent)
  1911. return [substitution_node], blank_finish
  1912. def disallowed_inside_substitution_definitions(self, node):
  1913. if (node['ids']
  1914. or isinstance(node, nodes.reference) and node.get('anonymous')
  1915. or isinstance(node, nodes.footnote_reference) and node.get('auto')): # noqa: E501
  1916. return True
  1917. else:
  1918. return False
  1919. def directive(self, match, **option_presets):
  1920. """Returns a 2-tuple: list of nodes, and a "blank finish" boolean."""
  1921. type_name = match.group(1)
  1922. directive_class, messages = directives.directive(
  1923. type_name, self.memo.language, self.document)
  1924. self.parent += messages
  1925. if directive_class:
  1926. return self.run_directive(
  1927. directive_class, match, type_name, option_presets)
  1928. else:
  1929. return self.unknown_directive(type_name)
  1930. def run_directive(self, directive, match, type_name, option_presets):
  1931. """
  1932. Parse a directive then run its directive function.
  1933. Parameters:
  1934. - `directive`: The class implementing the directive. Must be
  1935. a subclass of `rst.Directive`.
  1936. - `match`: A regular expression match object which matched the first
  1937. line of the directive.
  1938. - `type_name`: The directive name, as used in the source text.
  1939. - `option_presets`: A dictionary of preset options, defaults for the
  1940. directive options. Currently, only an "alt" option is passed by
  1941. substitution definitions (value: the substitution name), which may
  1942. be used by an embedded image directive.
  1943. Returns a 2-tuple: list of nodes, and a "blank finish" boolean.
  1944. """
  1945. if isinstance(directive, (FunctionType, MethodType)):
  1946. from docutils.parsers.rst import convert_directive_function
  1947. directive = convert_directive_function(directive)
  1948. lineno = self.state_machine.abs_line_number()
  1949. initial_line_offset = self.state_machine.line_offset
  1950. (indented, indent, line_offset, blank_finish
  1951. ) = self.state_machine.get_first_known_indented(match.end(),
  1952. strip_top=0)
  1953. block_text = '\n'.join(self.state_machine.input_lines[
  1954. initial_line_offset : self.state_machine.line_offset + 1]) # noqa: E203,E501
  1955. try:
  1956. arguments, options, content, content_offset = (
  1957. self.parse_directive_block(indented, line_offset,
  1958. directive, option_presets))
  1959. except MarkupError as detail:
  1960. error = self.reporter.error(
  1961. 'Error in "%s" directive:\n%s.' % (type_name,
  1962. ' '.join(detail.args)),
  1963. nodes.literal_block(block_text, block_text), line=lineno)
  1964. return [error], blank_finish
  1965. directive_instance = directive(
  1966. type_name, arguments, options, content, lineno,
  1967. content_offset, block_text, self, self.state_machine)
  1968. try:
  1969. result = directive_instance.run()
  1970. except docutils.parsers.rst.DirectiveError as error:
  1971. msg_node = self.reporter.system_message(error.level, error.msg,
  1972. line=lineno)
  1973. msg_node += nodes.literal_block(block_text, block_text)
  1974. result = [msg_node]
  1975. assert isinstance(result, list), \
  1976. 'Directive "%s" must return a list of nodes.' % type_name
  1977. for i in range(len(result)):
  1978. assert isinstance(result[i], nodes.Node), \
  1979. ('Directive "%s" returned non-Node object (index %s): %r'
  1980. % (type_name, i, result[i]))
  1981. return (result,
  1982. blank_finish or self.state_machine.is_next_line_blank())
  1983. def parse_directive_block(self, indented, line_offset, directive,
  1984. option_presets):
  1985. option_spec = directive.option_spec
  1986. has_content = directive.has_content
  1987. if indented and not indented[0].strip():
  1988. indented.trim_start()
  1989. line_offset += 1
  1990. while indented and not indented[-1].strip():
  1991. indented.trim_end()
  1992. if indented and (directive.required_arguments
  1993. or directive.optional_arguments
  1994. or option_spec):
  1995. for i, line in enumerate(indented):
  1996. if not line.strip():
  1997. break
  1998. else:
  1999. i += 1
  2000. arg_block = indented[:i]
  2001. content = indented[i+1:]
  2002. content_offset = line_offset + i + 1
  2003. else:
  2004. content = indented
  2005. content_offset = line_offset
  2006. arg_block = []
  2007. if option_spec:
  2008. options, arg_block = self.parse_directive_options(
  2009. option_presets, option_spec, arg_block)
  2010. else:
  2011. options = {}
  2012. if arg_block and not (directive.required_arguments
  2013. or directive.optional_arguments):
  2014. content = arg_block + indented[i:]
  2015. content_offset = line_offset
  2016. arg_block = []
  2017. while content and not content[0].strip():
  2018. content.trim_start()
  2019. content_offset += 1
  2020. if directive.required_arguments or directive.optional_arguments:
  2021. arguments = self.parse_directive_arguments(
  2022. directive, arg_block)
  2023. else:
  2024. arguments = []
  2025. if content and not has_content:
  2026. raise MarkupError('no content permitted')
  2027. return arguments, options, content, content_offset
  2028. def parse_directive_options(self, option_presets, option_spec, arg_block):
  2029. options = option_presets.copy()
  2030. for i, line in enumerate(arg_block):
  2031. if re.match(Body.patterns['field_marker'], line):
  2032. opt_block = arg_block[i:]
  2033. arg_block = arg_block[:i]
  2034. break
  2035. else:
  2036. opt_block = []
  2037. if opt_block:
  2038. success, data = self.parse_extension_options(option_spec,
  2039. opt_block)
  2040. if success: # data is a dict of options
  2041. options.update(data)
  2042. else: # data is an error string
  2043. raise MarkupError(data)
  2044. return options, arg_block
  2045. def parse_directive_arguments(self, directive, arg_block):
  2046. required = directive.required_arguments
  2047. optional = directive.optional_arguments
  2048. arg_text = '\n'.join(arg_block)
  2049. arguments = arg_text.split()
  2050. if len(arguments) < required:
  2051. raise MarkupError('%s argument(s) required, %s supplied'
  2052. % (required, len(arguments)))
  2053. elif len(arguments) > required + optional:
  2054. if directive.final_argument_whitespace:
  2055. arguments = arg_text.split(None, required + optional - 1)
  2056. else:
  2057. raise MarkupError(
  2058. 'maximum %s argument(s) allowed, %s supplied'
  2059. % (required + optional, len(arguments)))
  2060. return arguments
  2061. def parse_extension_options(self, option_spec, datalines):
  2062. """
  2063. Parse `datalines` for a field list containing extension options
  2064. matching `option_spec`.
  2065. :Parameters:
  2066. - `option_spec`: a mapping of option name to conversion
  2067. function, which should raise an exception on bad input.
  2068. - `datalines`: a list of input strings.
  2069. :Return:
  2070. - Success value, 1 or 0.
  2071. - An option dictionary on success, an error string on failure.
  2072. """
  2073. node = nodes.field_list()
  2074. newline_offset, blank_finish = self.nested_list_parse(
  2075. datalines, 0, node, initial_state='ExtensionOptions',
  2076. blank_finish=True)
  2077. if newline_offset != len(datalines): # incomplete parse of block
  2078. return 0, 'invalid option block'
  2079. try:
  2080. options = utils.extract_extension_options(node, option_spec)
  2081. except KeyError as detail:
  2082. return 0, 'unknown option: "%s"' % detail.args[0]
  2083. except (ValueError, TypeError) as detail:
  2084. return 0, 'invalid option value: %s' % ' '.join(detail.args)
  2085. except utils.ExtensionOptionError as detail:
  2086. return 0, 'invalid option data: %s' % ' '.join(detail.args)
  2087. if blank_finish:
  2088. return 1, options
  2089. else:
  2090. return 0, 'option data incompletely parsed'
  2091. def unknown_directive(self, type_name):
  2092. lineno = self.state_machine.abs_line_number()
  2093. (indented, indent, offset, blank_finish
  2094. ) = self.state_machine.get_first_known_indented(0, strip_indent=False)
  2095. text = '\n'.join(indented)
  2096. error = self.reporter.error('Unknown directive type "%s".' % type_name,
  2097. nodes.literal_block(text, text),
  2098. line=lineno)
  2099. return [error], blank_finish
  2100. def comment(self, match):
  2101. if self.state_machine.is_next_line_blank():
  2102. first_comment_line = match.string[match.end():]
  2103. if not first_comment_line.strip(): # empty comment
  2104. return [nodes.comment()], True # "A tiny but practical wart."
  2105. if first_comment_line.startswith('end of inclusion from "'):
  2106. # cf. parsers.rst.directives.misc.Include
  2107. self.document.include_log.pop()
  2108. return [], True
  2109. (indented, indent, offset, blank_finish
  2110. ) = self.state_machine.get_first_known_indented(match.end())
  2111. while indented and not indented[-1].strip():
  2112. indented.trim_end()
  2113. text = '\n'.join(indented)
  2114. return [nodes.comment(text, text)], blank_finish
  2115. explicit.constructs = [
  2116. (footnote,
  2117. re.compile(r"""
  2118. \.\.[ ]+ # explicit markup start
  2119. \[
  2120. ( # footnote label:
  2121. [0-9]+ # manually numbered footnote
  2122. | # *OR*
  2123. \# # anonymous auto-numbered footnote
  2124. | # *OR*
  2125. \#%s # auto-number ed?) footnote label
  2126. | # *OR*
  2127. \* # auto-symbol footnote
  2128. )
  2129. \]
  2130. ([ ]+|$) # whitespace or end of line
  2131. """ % Inliner.simplename, re.VERBOSE)),
  2132. (citation,
  2133. re.compile(r"""
  2134. \.\.[ ]+ # explicit markup start
  2135. \[(%s)\] # citation label
  2136. ([ ]+|$) # whitespace or end of line
  2137. """ % Inliner.simplename, re.VERBOSE)),
  2138. (hyperlink_target,
  2139. re.compile(r"""
  2140. \.\.[ ]+ # explicit markup start
  2141. _ # target indicator
  2142. (?![ ]|$) # first char. not space or EOL
  2143. """, re.VERBOSE)),
  2144. (substitution_def,
  2145. re.compile(r"""
  2146. \.\.[ ]+ # explicit markup start
  2147. \| # substitution indicator
  2148. (?![ ]|$) # first char. not space or EOL
  2149. """, re.VERBOSE)),
  2150. (directive,
  2151. re.compile(r"""
  2152. \.\.[ ]+ # explicit markup start
  2153. (%s) # directive name
  2154. [ ]? # optional space
  2155. :: # directive delimiter
  2156. ([ ]+|$) # whitespace or end of line
  2157. """ % Inliner.simplename, re.VERBOSE))]
  2158. def explicit_markup(self, match, context, next_state):
  2159. """Footnotes, hyperlink targets, directives, comments."""
  2160. nodelist, blank_finish = self.explicit_construct(match)
  2161. self.parent += nodelist
  2162. self.explicit_list(blank_finish)
  2163. return [], next_state, []
  2164. def explicit_construct(self, match):
  2165. """Determine which explicit construct this is, parse & return it."""
  2166. errors = []
  2167. for method, pattern in self.explicit.constructs:
  2168. expmatch = pattern.match(match.string)
  2169. if expmatch:
  2170. try:
  2171. return method(self, expmatch)
  2172. except MarkupError as error:
  2173. lineno = self.state_machine.abs_line_number()
  2174. message = ' '.join(error.args)
  2175. errors.append(self.reporter.warning(message, line=lineno))
  2176. break
  2177. nodelist, blank_finish = self.comment(match)
  2178. return nodelist + errors, blank_finish
  2179. def explicit_list(self, blank_finish):
  2180. """
  2181. Create a nested state machine for a series of explicit markup
  2182. constructs (including anonymous hyperlink targets).
  2183. """
  2184. offset = self.state_machine.line_offset + 1 # next line
  2185. newline_offset, blank_finish = self.nested_list_parse(
  2186. self.state_machine.input_lines[offset:],
  2187. input_offset=self.state_machine.abs_line_offset() + 1,
  2188. node=self.parent, initial_state='Explicit',
  2189. blank_finish=blank_finish,
  2190. match_titles=self.state_machine.match_titles)
  2191. self.goto_line(newline_offset)
  2192. if not blank_finish:
  2193. self.parent += self.unindent_warning('Explicit markup')
  2194. def anonymous(self, match, context, next_state):
  2195. """Anonymous hyperlink targets."""
  2196. nodelist, blank_finish = self.anonymous_target(match)
  2197. self.parent += nodelist
  2198. self.explicit_list(blank_finish)
  2199. return [], next_state, []
  2200. def anonymous_target(self, match):
  2201. lineno = self.state_machine.abs_line_number()
  2202. (block, indent, offset, blank_finish
  2203. ) = self.state_machine.get_first_known_indented(match.end(),
  2204. until_blank=True)
  2205. blocktext = match.string[:match.end()] + '\n'.join(block)
  2206. block = [escape2null(line) for line in block]
  2207. target = self.make_target(block, blocktext, lineno, '')
  2208. return [target], blank_finish
  2209. def line(self, match, context, next_state):
  2210. """Section title overline or transition marker."""
  2211. if self.state_machine.match_titles:
  2212. return [match.string], 'Line', []
  2213. elif match.string.strip() == '::':
  2214. raise statemachine.TransitionCorrection('text')
  2215. elif len(match.string.strip()) < 4:
  2216. msg = self.reporter.info(
  2217. 'Unexpected possible title overline or transition.\n'
  2218. "Treating it as ordinary text because it's so short.",
  2219. line=self.state_machine.abs_line_number())
  2220. self.parent += msg
  2221. raise statemachine.TransitionCorrection('text')
  2222. else:
  2223. blocktext = self.state_machine.line
  2224. msg = self.reporter.severe(
  2225. 'Unexpected section title or transition.',
  2226. nodes.literal_block(blocktext, blocktext),
  2227. line=self.state_machine.abs_line_number())
  2228. self.parent += msg
  2229. return [], next_state, []
  2230. def text(self, match, context, next_state):
  2231. """Titles, definition lists, paragraphs."""
  2232. return [match.string], 'Text', []
  2233. class RFC2822Body(Body):
  2234. """
  2235. RFC2822 headers are only valid as the first constructs in documents. As
  2236. soon as anything else appears, the `Body` state should take over.
  2237. """
  2238. patterns = Body.patterns.copy() # can't modify the original
  2239. patterns['rfc2822'] = r'[!-9;-~]+:( +|$)'
  2240. initial_transitions = [(name, 'Body')
  2241. for name in Body.initial_transitions]
  2242. initial_transitions.insert(-1, ('rfc2822', 'Body')) # just before 'text'
  2243. def rfc2822(self, match, context, next_state):
  2244. """RFC2822-style field list item."""
  2245. fieldlist = nodes.field_list(classes=['rfc2822'])
  2246. self.parent += fieldlist
  2247. field, blank_finish = self.rfc2822_field(match)
  2248. fieldlist += field
  2249. offset = self.state_machine.line_offset + 1 # next line
  2250. newline_offset, blank_finish = self.nested_list_parse(
  2251. self.state_machine.input_lines[offset:],
  2252. input_offset=self.state_machine.abs_line_offset() + 1,
  2253. node=fieldlist, initial_state='RFC2822List',
  2254. blank_finish=blank_finish)
  2255. self.goto_line(newline_offset)
  2256. if not blank_finish:
  2257. self.parent += self.unindent_warning(
  2258. 'RFC2822-style field list')
  2259. return [], next_state, []
  2260. def rfc2822_field(self, match):
  2261. name = match.string[:match.string.find(':')]
  2262. (indented, indent, line_offset, blank_finish
  2263. ) = self.state_machine.get_first_known_indented(match.end(),
  2264. until_blank=True)
  2265. fieldnode = nodes.field()
  2266. fieldnode += nodes.field_name(name, name)
  2267. fieldbody = nodes.field_body('\n'.join(indented))
  2268. fieldnode += fieldbody
  2269. if indented:
  2270. self.nested_parse(indented, input_offset=line_offset,
  2271. node=fieldbody)
  2272. return fieldnode, blank_finish
  2273. class SpecializedBody(Body):
  2274. """
  2275. Superclass for second and subsequent compound element members. Compound
  2276. elements are lists and list-like constructs.
  2277. All transition methods are disabled (redefined as `invalid_input`).
  2278. Override individual methods in subclasses to re-enable.
  2279. For example, once an initial bullet list item, say, is recognized, the
  2280. `BulletList` subclass takes over, with a "bullet_list" node as its
  2281. container. Upon encountering the initial bullet list item, `Body.bullet`
  2282. calls its ``self.nested_list_parse`` (`RSTState.nested_list_parse`), which
  2283. starts up a nested parsing session with `BulletList` as the initial state.
  2284. Only the ``bullet`` transition method is enabled in `BulletList`; as long
  2285. as only bullet list items are encountered, they are parsed and inserted
  2286. into the container. The first construct which is *not* a bullet list item
  2287. triggers the `invalid_input` method, which ends the nested parse and
  2288. closes the container. `BulletList` needs to recognize input that is
  2289. invalid in the context of a bullet list, which means everything *other
  2290. than* bullet list items, so it inherits the transition list created in
  2291. `Body`.
  2292. """
  2293. def invalid_input(self, match=None, context=None, next_state=None):
  2294. """Not a compound element member. Abort this state machine."""
  2295. self.state_machine.previous_line() # back up so parent SM can reassess
  2296. raise EOFError
  2297. indent = invalid_input
  2298. bullet = invalid_input
  2299. enumerator = invalid_input
  2300. field_marker = invalid_input
  2301. option_marker = invalid_input
  2302. doctest = invalid_input
  2303. line_block = invalid_input
  2304. grid_table_top = invalid_input
  2305. simple_table_top = invalid_input
  2306. explicit_markup = invalid_input
  2307. anonymous = invalid_input
  2308. line = invalid_input
  2309. text = invalid_input
  2310. class BulletList(SpecializedBody):
  2311. """Second and subsequent bullet_list list_items."""
  2312. def bullet(self, match, context, next_state):
  2313. """Bullet list item."""
  2314. if match.string[0] != self.parent['bullet']:
  2315. # different bullet: new list
  2316. self.invalid_input()
  2317. listitem, blank_finish = self.list_item(match.end())
  2318. self.parent += listitem
  2319. self.blank_finish = blank_finish
  2320. return [], next_state, []
  2321. class DefinitionList(SpecializedBody):
  2322. """Second and subsequent definition_list_items."""
  2323. def text(self, match, context, next_state):
  2324. """Definition lists."""
  2325. return [match.string], 'Definition', []
  2326. class EnumeratedList(SpecializedBody):
  2327. """Second and subsequent enumerated_list list_items."""
  2328. def enumerator(self, match, context, next_state):
  2329. """Enumerated list item."""
  2330. format, sequence, text, ordinal = self.parse_enumerator(
  2331. match, self.parent['enumtype'])
  2332. if (format != self.format
  2333. or (sequence != '#' and (sequence != self.parent['enumtype']
  2334. or self.auto
  2335. or ordinal != (self.lastordinal + 1)))
  2336. or not self.is_enumerated_list_item(ordinal, sequence, format)):
  2337. # different enumeration: new list
  2338. self.invalid_input()
  2339. if sequence == '#':
  2340. self.auto = 1
  2341. listitem, blank_finish = self.list_item(match.end())
  2342. self.parent += listitem
  2343. self.blank_finish = blank_finish
  2344. self.lastordinal = ordinal
  2345. return [], next_state, []
  2346. class FieldList(SpecializedBody):
  2347. """Second and subsequent field_list fields."""
  2348. def field_marker(self, match, context, next_state):
  2349. """Field list field."""
  2350. field, blank_finish = self.field(match)
  2351. self.parent += field
  2352. self.blank_finish = blank_finish
  2353. return [], next_state, []
  2354. class OptionList(SpecializedBody):
  2355. """Second and subsequent option_list option_list_items."""
  2356. def option_marker(self, match, context, next_state):
  2357. """Option list item."""
  2358. try:
  2359. option_list_item, blank_finish = self.option_list_item(match)
  2360. except MarkupError:
  2361. self.invalid_input()
  2362. self.parent += option_list_item
  2363. self.blank_finish = blank_finish
  2364. return [], next_state, []
  2365. class RFC2822List(SpecializedBody, RFC2822Body):
  2366. """Second and subsequent RFC2822-style field_list fields."""
  2367. patterns = RFC2822Body.patterns
  2368. initial_transitions = RFC2822Body.initial_transitions
  2369. def rfc2822(self, match, context, next_state):
  2370. """RFC2822-style field list item."""
  2371. field, blank_finish = self.rfc2822_field(match)
  2372. self.parent += field
  2373. self.blank_finish = blank_finish
  2374. return [], 'RFC2822List', []
  2375. blank = SpecializedBody.invalid_input
  2376. class ExtensionOptions(FieldList):
  2377. """
  2378. Parse field_list fields for extension options.
  2379. No nested parsing is done (including inline markup parsing).
  2380. """
  2381. def parse_field_body(self, indented, offset, node):
  2382. """Override `Body.parse_field_body` for simpler parsing."""
  2383. lines = []
  2384. for line in list(indented) + ['']:
  2385. if line.strip():
  2386. lines.append(line)
  2387. elif lines:
  2388. text = '\n'.join(lines)
  2389. node += nodes.paragraph(text, text)
  2390. lines = []
  2391. class LineBlock(SpecializedBody):
  2392. """Second and subsequent lines of a line_block."""
  2393. blank = SpecializedBody.invalid_input
  2394. def line_block(self, match, context, next_state):
  2395. """New line of line block."""
  2396. lineno = self.state_machine.abs_line_number()
  2397. line, messages, blank_finish = self.line_block_line(match, lineno)
  2398. self.parent += line
  2399. self.parent.parent += messages
  2400. self.blank_finish = blank_finish
  2401. return [], next_state, []
  2402. class Explicit(SpecializedBody):
  2403. """Second and subsequent explicit markup construct."""
  2404. def explicit_markup(self, match, context, next_state):
  2405. """Footnotes, hyperlink targets, directives, comments."""
  2406. nodelist, blank_finish = self.explicit_construct(match)
  2407. self.parent += nodelist
  2408. self.blank_finish = blank_finish
  2409. return [], next_state, []
  2410. def anonymous(self, match, context, next_state):
  2411. """Anonymous hyperlink targets."""
  2412. nodelist, blank_finish = self.anonymous_target(match)
  2413. self.parent += nodelist
  2414. self.blank_finish = blank_finish
  2415. return [], next_state, []
  2416. blank = SpecializedBody.invalid_input
  2417. class SubstitutionDef(Body):
  2418. """
  2419. Parser for the contents of a substitution_definition element.
  2420. """
  2421. patterns = {
  2422. 'embedded_directive': re.compile(r'(%s)::( +|$)'
  2423. % Inliner.simplename),
  2424. 'text': r''}
  2425. initial_transitions = ['embedded_directive', 'text']
  2426. def embedded_directive(self, match, context, next_state):
  2427. nodelist, blank_finish = self.directive(match,
  2428. alt=self.parent['names'][0])
  2429. self.parent += nodelist
  2430. if not self.state_machine.at_eof():
  2431. self.blank_finish = blank_finish
  2432. raise EOFError
  2433. def text(self, match, context, next_state):
  2434. if not self.state_machine.at_eof():
  2435. self.blank_finish = self.state_machine.is_next_line_blank()
  2436. raise EOFError
  2437. class Text(RSTState):
  2438. """
  2439. Classifier of second line of a text block.
  2440. Could be a paragraph, a definition list item, or a title.
  2441. """
  2442. patterns = {'underline': Body.patterns['line'],
  2443. 'text': r''}
  2444. initial_transitions = [('underline', 'Body'), ('text', 'Body')]
  2445. def blank(self, match, context, next_state):
  2446. """End of paragraph."""
  2447. # NOTE: self.paragraph returns [node, system_message(s)], literalnext
  2448. paragraph, literalnext = self.paragraph(
  2449. context, self.state_machine.abs_line_number() - 1)
  2450. self.parent += paragraph
  2451. if literalnext:
  2452. self.parent += self.literal_block()
  2453. return [], 'Body', []
  2454. def eof(self, context):
  2455. if context:
  2456. self.blank(None, context, None)
  2457. return []
  2458. def indent(self, match, context, next_state):
  2459. """Definition list item."""
  2460. definitionlist = nodes.definition_list()
  2461. definitionlistitem, blank_finish = self.definition_list_item(context)
  2462. definitionlist += definitionlistitem
  2463. self.parent += definitionlist
  2464. offset = self.state_machine.line_offset + 1 # next line
  2465. newline_offset, blank_finish = self.nested_list_parse(
  2466. self.state_machine.input_lines[offset:],
  2467. input_offset=self.state_machine.abs_line_offset() + 1,
  2468. node=definitionlist, initial_state='DefinitionList',
  2469. blank_finish=blank_finish, blank_finish_state='Definition')
  2470. self.goto_line(newline_offset)
  2471. if not blank_finish:
  2472. self.parent += self.unindent_warning('Definition list')
  2473. return [], 'Body', []
  2474. def underline(self, match, context, next_state):
  2475. """Section title."""
  2476. lineno = self.state_machine.abs_line_number()
  2477. title = context[0].rstrip()
  2478. underline = match.string.rstrip()
  2479. source = title + '\n' + underline
  2480. messages = []
  2481. if column_width(title) > len(underline):
  2482. if len(underline) < 4:
  2483. if self.state_machine.match_titles:
  2484. msg = self.reporter.info(
  2485. 'Possible title underline, too short for the title.\n'
  2486. "Treating it as ordinary text because it's so short.",
  2487. line=lineno)
  2488. self.parent += msg
  2489. raise statemachine.TransitionCorrection('text')
  2490. else:
  2491. blocktext = context[0] + '\n' + self.state_machine.line
  2492. msg = self.reporter.warning(
  2493. 'Title underline too short.',
  2494. nodes.literal_block(blocktext, blocktext),
  2495. line=lineno)
  2496. messages.append(msg)
  2497. if not self.state_machine.match_titles:
  2498. blocktext = context[0] + '\n' + self.state_machine.line
  2499. # We need get_source_and_line() here to report correctly
  2500. src, srcline = self.state_machine.get_source_and_line()
  2501. # TODO: why is abs_line_number() == srcline+1
  2502. # if the error is in a table (try with test_tables.py)?
  2503. # print("get_source_and_line", srcline)
  2504. # print("abs_line_number", self.state_machine.abs_line_number())
  2505. msg = self.reporter.severe(
  2506. 'Unexpected section title.',
  2507. nodes.literal_block(blocktext, blocktext),
  2508. source=src, line=srcline)
  2509. self.parent += messages
  2510. self.parent += msg
  2511. return [], next_state, []
  2512. style = underline[0]
  2513. context[:] = []
  2514. self.section(title, source, style, lineno - 1, messages)
  2515. return [], next_state, []
  2516. def text(self, match, context, next_state):
  2517. """Paragraph."""
  2518. startline = self.state_machine.abs_line_number() - 1
  2519. msg = None
  2520. try:
  2521. block = self.state_machine.get_text_block(flush_left=True)
  2522. except statemachine.UnexpectedIndentationError as err:
  2523. block, src, srcline = err.args
  2524. msg = self.reporter.error('Unexpected indentation.',
  2525. source=src, line=srcline)
  2526. lines = context + list(block)
  2527. paragraph, literalnext = self.paragraph(lines, startline)
  2528. self.parent += paragraph
  2529. self.parent += msg
  2530. if literalnext:
  2531. try:
  2532. self.state_machine.next_line()
  2533. except EOFError:
  2534. pass
  2535. self.parent += self.literal_block()
  2536. return [], next_state, []
  2537. def literal_block(self):
  2538. """Return a list of nodes."""
  2539. (indented, indent, offset, blank_finish
  2540. ) = self.state_machine.get_indented()
  2541. while indented and not indented[-1].strip():
  2542. indented.trim_end()
  2543. if not indented:
  2544. return self.quoted_literal_block()
  2545. data = '\n'.join(indented)
  2546. literal_block = nodes.literal_block(data, data)
  2547. (literal_block.source,
  2548. literal_block.line) = self.state_machine.get_source_and_line(offset+1)
  2549. nodelist = [literal_block]
  2550. if not blank_finish:
  2551. nodelist.append(self.unindent_warning('Literal block'))
  2552. return nodelist
  2553. def quoted_literal_block(self):
  2554. abs_line_offset = self.state_machine.abs_line_offset()
  2555. offset = self.state_machine.line_offset
  2556. parent_node = nodes.Element()
  2557. new_abs_offset = self.nested_parse(
  2558. self.state_machine.input_lines[offset:],
  2559. input_offset=abs_line_offset, node=parent_node, match_titles=False,
  2560. state_machine_kwargs={'state_classes': (QuotedLiteralBlock,),
  2561. 'initial_state': 'QuotedLiteralBlock'})
  2562. self.goto_line(new_abs_offset)
  2563. return parent_node.children
  2564. def definition_list_item(self, termline):
  2565. (indented, indent, line_offset, blank_finish
  2566. ) = self.state_machine.get_indented()
  2567. itemnode = nodes.definition_list_item(
  2568. '\n'.join(termline + list(indented)))
  2569. lineno = self.state_machine.abs_line_number() - 1
  2570. (itemnode.source,
  2571. itemnode.line) = self.state_machine.get_source_and_line(lineno)
  2572. termlist, messages = self.term(termline, lineno)
  2573. itemnode += termlist
  2574. definition = nodes.definition('', *messages)
  2575. itemnode += definition
  2576. if termline[0][-2:] == '::':
  2577. definition += self.reporter.info(
  2578. 'Blank line missing before literal block (after the "::")? '
  2579. 'Interpreted as a definition list item.',
  2580. line=lineno+1)
  2581. self.nested_parse(indented, input_offset=line_offset, node=definition)
  2582. return itemnode, blank_finish
  2583. classifier_delimiter = re.compile(' +: +')
  2584. def term(self, lines, lineno):
  2585. """Return a definition_list's term and optional classifiers."""
  2586. assert len(lines) == 1
  2587. text_nodes, messages = self.inline_text(lines[0], lineno)
  2588. term_node = nodes.term(lines[0])
  2589. (term_node.source,
  2590. term_node.line) = self.state_machine.get_source_and_line(lineno)
  2591. node_list = [term_node]
  2592. for i in range(len(text_nodes)):
  2593. node = text_nodes[i]
  2594. if isinstance(node, nodes.Text):
  2595. parts = self.classifier_delimiter.split(node)
  2596. if len(parts) == 1:
  2597. node_list[-1] += node
  2598. else:
  2599. text = parts[0].rstrip()
  2600. textnode = nodes.Text(text)
  2601. node_list[-1] += textnode
  2602. for part in parts[1:]:
  2603. node_list.append(
  2604. nodes.classifier(unescape(part, True), part))
  2605. else:
  2606. node_list[-1] += node
  2607. return node_list, messages
  2608. class SpecializedText(Text):
  2609. """
  2610. Superclass for second and subsequent lines of Text-variants.
  2611. All transition methods are disabled. Override individual methods in
  2612. subclasses to re-enable.
  2613. """
  2614. def eof(self, context):
  2615. """Incomplete construct."""
  2616. return []
  2617. def invalid_input(self, match=None, context=None, next_state=None):
  2618. """Not a compound element member. Abort this state machine."""
  2619. raise EOFError
  2620. blank = invalid_input
  2621. indent = invalid_input
  2622. underline = invalid_input
  2623. text = invalid_input
  2624. class Definition(SpecializedText):
  2625. """Second line of potential definition_list_item."""
  2626. def eof(self, context):
  2627. """Not a definition."""
  2628. self.state_machine.previous_line(2) # so parent SM can reassess
  2629. return []
  2630. def indent(self, match, context, next_state):
  2631. """Definition list item."""
  2632. itemnode, blank_finish = self.definition_list_item(context)
  2633. self.parent += itemnode
  2634. self.blank_finish = blank_finish
  2635. return [], 'DefinitionList', []
  2636. class Line(SpecializedText):
  2637. """
  2638. Second line of over- & underlined section title or transition marker.
  2639. """
  2640. eofcheck = 1 # @@@ ???
  2641. """Set to 0 while parsing sections, so that we don't catch the EOF."""
  2642. def eof(self, context):
  2643. """Transition marker at end of section or document."""
  2644. marker = context[0].strip()
  2645. if self.memo.section_bubble_up_kludge:
  2646. self.memo.section_bubble_up_kludge = False
  2647. elif len(marker) < 4:
  2648. self.state_correction(context)
  2649. if self.eofcheck: # ignore EOFError with sections
  2650. src, srcline = self.state_machine.get_source_and_line()
  2651. # lineno = self.state_machine.abs_line_number() - 1
  2652. transition = nodes.transition(rawsource=context[0])
  2653. transition.source = src
  2654. transition.line = srcline - 1
  2655. # transition.line = lineno
  2656. self.parent += transition
  2657. self.eofcheck = 1
  2658. return []
  2659. def blank(self, match, context, next_state):
  2660. """Transition marker."""
  2661. src, srcline = self.state_machine.get_source_and_line()
  2662. marker = context[0].strip()
  2663. if len(marker) < 4:
  2664. self.state_correction(context)
  2665. transition = nodes.transition(rawsource=marker)
  2666. transition.source = src
  2667. transition.line = srcline - 1
  2668. self.parent += transition
  2669. return [], 'Body', []
  2670. def text(self, match, context, next_state):
  2671. """Potential over- & underlined title."""
  2672. lineno = self.state_machine.abs_line_number() - 1
  2673. overline = context[0]
  2674. title = match.string
  2675. underline = ''
  2676. try:
  2677. underline = self.state_machine.next_line()
  2678. except EOFError:
  2679. blocktext = overline + '\n' + title
  2680. if len(overline.rstrip()) < 4:
  2681. self.short_overline(context, blocktext, lineno, 2)
  2682. else:
  2683. msg = self.reporter.severe(
  2684. 'Incomplete section title.',
  2685. nodes.literal_block(blocktext, blocktext),
  2686. line=lineno)
  2687. self.parent += msg
  2688. return [], 'Body', []
  2689. source = '%s\n%s\n%s' % (overline, title, underline)
  2690. overline = overline.rstrip()
  2691. underline = underline.rstrip()
  2692. if not self.transitions['underline'][0].match(underline):
  2693. blocktext = overline + '\n' + title + '\n' + underline
  2694. if len(overline.rstrip()) < 4:
  2695. self.short_overline(context, blocktext, lineno, 2)
  2696. else:
  2697. msg = self.reporter.severe(
  2698. 'Missing matching underline for section title overline.',
  2699. nodes.literal_block(source, source),
  2700. line=lineno)
  2701. self.parent += msg
  2702. return [], 'Body', []
  2703. elif overline != underline:
  2704. blocktext = overline + '\n' + title + '\n' + underline
  2705. if len(overline.rstrip()) < 4:
  2706. self.short_overline(context, blocktext, lineno, 2)
  2707. else:
  2708. msg = self.reporter.severe(
  2709. 'Title overline & underline mismatch.',
  2710. nodes.literal_block(source, source),
  2711. line=lineno)
  2712. self.parent += msg
  2713. return [], 'Body', []
  2714. title = title.rstrip()
  2715. messages = []
  2716. if column_width(title) > len(overline):
  2717. blocktext = overline + '\n' + title + '\n' + underline
  2718. if len(overline.rstrip()) < 4:
  2719. self.short_overline(context, blocktext, lineno, 2)
  2720. else:
  2721. msg = self.reporter.warning(
  2722. 'Title overline too short.',
  2723. nodes.literal_block(source, source),
  2724. line=lineno)
  2725. messages.append(msg)
  2726. style = (overline[0], underline[0])
  2727. self.eofcheck = 0 # @@@ not sure this is correct
  2728. self.section(title.lstrip(), source, style, lineno + 1, messages)
  2729. self.eofcheck = 1
  2730. return [], 'Body', []
  2731. indent = text # indented title
  2732. def underline(self, match, context, next_state):
  2733. overline = context[0]
  2734. blocktext = overline + '\n' + self.state_machine.line
  2735. lineno = self.state_machine.abs_line_number() - 1
  2736. if len(overline.rstrip()) < 4:
  2737. self.short_overline(context, blocktext, lineno, 1)
  2738. msg = self.reporter.error(
  2739. 'Invalid section title or transition marker.',
  2740. nodes.literal_block(blocktext, blocktext),
  2741. line=lineno)
  2742. self.parent += msg
  2743. return [], 'Body', []
  2744. def short_overline(self, context, blocktext, lineno, lines=1):
  2745. msg = self.reporter.info(
  2746. 'Possible incomplete section title.\nTreating the overline as '
  2747. "ordinary text because it's so short.",
  2748. line=lineno)
  2749. self.parent += msg
  2750. self.state_correction(context, lines)
  2751. def state_correction(self, context, lines=1):
  2752. self.state_machine.previous_line(lines)
  2753. context[:] = []
  2754. raise statemachine.StateCorrection('Body', 'text')
  2755. class QuotedLiteralBlock(RSTState):
  2756. """
  2757. Nested parse handler for quoted (unindented) literal blocks.
  2758. Special-purpose. Not for inclusion in `state_classes`.
  2759. """
  2760. patterns = {'initial_quoted': r'(%(nonalphanum7bit)s)' % Body.pats,
  2761. 'text': r''}
  2762. initial_transitions = ('initial_quoted', 'text')
  2763. def __init__(self, state_machine, debug=False):
  2764. RSTState.__init__(self, state_machine, debug)
  2765. self.messages = []
  2766. self.initial_lineno = None
  2767. def blank(self, match, context, next_state):
  2768. if context:
  2769. raise EOFError
  2770. else:
  2771. return context, next_state, []
  2772. def eof(self, context):
  2773. if context:
  2774. src, srcline = self.state_machine.get_source_and_line(
  2775. self.initial_lineno)
  2776. text = '\n'.join(context)
  2777. literal_block = nodes.literal_block(text, text)
  2778. literal_block.source = src
  2779. literal_block.line = srcline
  2780. self.parent += literal_block
  2781. else:
  2782. self.parent += self.reporter.warning(
  2783. 'Literal block expected; none found.',
  2784. line=self.state_machine.abs_line_number()
  2785. ) # src not available, statemachine.input_lines is empty
  2786. self.state_machine.previous_line()
  2787. self.parent += self.messages
  2788. return []
  2789. def indent(self, match, context, next_state):
  2790. assert context, ('QuotedLiteralBlock.indent: context should not '
  2791. 'be empty!')
  2792. self.messages.append(
  2793. self.reporter.error('Unexpected indentation.',
  2794. line=self.state_machine.abs_line_number()))
  2795. self.state_machine.previous_line()
  2796. raise EOFError
  2797. def initial_quoted(self, match, context, next_state):
  2798. """Match arbitrary quote character on the first line only."""
  2799. self.remove_transition('initial_quoted')
  2800. quote = match.string[0]
  2801. pattern = re.compile(re.escape(quote))
  2802. # New transition matches consistent quotes only:
  2803. self.add_transition('quoted',
  2804. (pattern, self.quoted, self.__class__.__name__))
  2805. self.initial_lineno = self.state_machine.abs_line_number()
  2806. return [match.string], next_state, []
  2807. def quoted(self, match, context, next_state):
  2808. """Match consistent quotes on subsequent lines."""
  2809. context.append(match.string)
  2810. return context, next_state, []
  2811. def text(self, match, context, next_state):
  2812. if context:
  2813. self.messages.append(
  2814. self.reporter.error('Inconsistent literal block quoting.',
  2815. line=self.state_machine.abs_line_number()))
  2816. self.state_machine.previous_line()
  2817. raise EOFError
  2818. state_classes = (Body, BulletList, DefinitionList, EnumeratedList, FieldList,
  2819. OptionList, LineBlock, ExtensionOptions, Explicit, Text,
  2820. Definition, Line, SubstitutionDef, RFC2822Body, RFC2822List)
  2821. """Standard set of State classes used to start `RSTStateMachine`."""