1
0

tables.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513
  1. # $Id: tables.py 9032 2022-03-05 23:29:06Z milde $
  2. # Authors: David Goodger <goodger@python.org>; David Priest
  3. # Copyright: This module has been placed in the public domain.
  4. """
  5. Directives for table elements.
  6. """
  7. __docformat__ = 'reStructuredText'
  8. import csv
  9. import os.path
  10. import warnings
  11. from docutils import io, nodes, statemachine, utils
  12. from docutils.utils import SystemMessagePropagation
  13. from docutils.parsers.rst import Directive
  14. from docutils.parsers.rst import directives
  15. def align(argument):
  16. return directives.choice(argument, ('left', 'center', 'right'))
  17. class Table(Directive):
  18. """
  19. Generic table base class.
  20. """
  21. optional_arguments = 1
  22. final_argument_whitespace = True
  23. option_spec = {'class': directives.class_option,
  24. 'name': directives.unchanged,
  25. 'align': align,
  26. 'width': directives.length_or_percentage_or_unitless,
  27. 'widths': directives.value_or(('auto', 'grid'),
  28. directives.positive_int_list)}
  29. has_content = True
  30. def make_title(self):
  31. if self.arguments:
  32. title_text = self.arguments[0]
  33. text_nodes, messages = self.state.inline_text(title_text,
  34. self.lineno)
  35. title = nodes.title(title_text, '', *text_nodes)
  36. (title.source,
  37. title.line) = self.state_machine.get_source_and_line(self.lineno)
  38. else:
  39. title = None
  40. messages = []
  41. return title, messages
  42. def process_header_option(self):
  43. source = self.state_machine.get_source(self.lineno - 1)
  44. table_head = []
  45. max_header_cols = 0
  46. if 'header' in self.options: # separate table header in option
  47. rows, max_header_cols = self.parse_csv_data_into_rows(
  48. self.options['header'].split('\n'), self.HeaderDialect(),
  49. source)
  50. table_head.extend(rows)
  51. return table_head, max_header_cols
  52. def check_table_dimensions(self, rows, header_rows, stub_columns):
  53. if len(rows) < header_rows:
  54. error = self.reporter.error('%s header row(s) specified but '
  55. 'only %s row(s) of data supplied ("%s" directive).'
  56. % (header_rows, len(rows), self.name),
  57. nodes.literal_block(self.block_text, self.block_text),
  58. line=self.lineno)
  59. raise SystemMessagePropagation(error)
  60. if len(rows) == header_rows > 0:
  61. error = self.reporter.error(
  62. f'Insufficient data supplied ({len(rows)} row(s)); '
  63. 'no data remaining for table body, '
  64. f'required by "{self.name}" directive.',
  65. nodes.literal_block(self.block_text, self.block_text),
  66. line=self.lineno)
  67. raise SystemMessagePropagation(error)
  68. for row in rows:
  69. if len(row) < stub_columns:
  70. error = self.reporter.error(
  71. f'{stub_columns} stub column(s) specified '
  72. f'but only {len(row)} columns(s) of data supplied '
  73. f'("{self.name}" directive).',
  74. nodes.literal_block(self.block_text, self.block_text),
  75. line=self.lineno)
  76. raise SystemMessagePropagation(error)
  77. if len(row) == stub_columns > 0:
  78. error = self.reporter.error(
  79. 'Insufficient data supplied (%s columns(s)); '
  80. 'no data remaining for table body, required '
  81. 'by "%s" directive.' % (len(row), self.name),
  82. nodes.literal_block(self.block_text, self.block_text),
  83. line=self.lineno)
  84. raise SystemMessagePropagation(error)
  85. def set_table_width(self, table_node):
  86. if 'width' in self.options:
  87. table_node['width'] = self.options.get('width')
  88. @property
  89. def widths(self):
  90. return self.options.get('widths', '')
  91. def get_column_widths(self, n_cols):
  92. if isinstance(self.widths, list):
  93. if len(self.widths) != n_cols:
  94. # TODO: use last value for missing columns?
  95. error = self.reporter.error('"%s" widths do not match the '
  96. 'number of columns in table (%s).' % (self.name, n_cols),
  97. nodes.literal_block(self.block_text, self.block_text),
  98. line=self.lineno)
  99. raise SystemMessagePropagation(error)
  100. col_widths = self.widths
  101. elif n_cols:
  102. col_widths = [100 // n_cols] * n_cols
  103. else:
  104. error = self.reporter.error('No table data detected in CSV file.',
  105. nodes.literal_block(self.block_text, self.block_text),
  106. line=self.lineno)
  107. raise SystemMessagePropagation(error)
  108. return col_widths
  109. def extend_short_rows_with_empty_cells(self, columns, parts):
  110. for part in parts:
  111. for row in part:
  112. if len(row) < columns:
  113. row.extend([(0, 0, 0, [])] * (columns - len(row)))
  114. class RSTTable(Table):
  115. def run(self):
  116. if not self.content:
  117. warning = self.reporter.warning('Content block expected '
  118. 'for the "%s" directive; none found.' % self.name,
  119. nodes.literal_block(self.block_text, self.block_text),
  120. line=self.lineno)
  121. return [warning]
  122. title, messages = self.make_title()
  123. node = nodes.Element() # anonymous container for parsing
  124. self.state.nested_parse(self.content, self.content_offset, node)
  125. if len(node) != 1 or not isinstance(node[0], nodes.table):
  126. error = self.reporter.error('Error parsing content block for the '
  127. '"%s" directive: exactly one table expected.' % self.name,
  128. nodes.literal_block(self.block_text, self.block_text),
  129. line=self.lineno)
  130. return [error]
  131. table_node = node[0]
  132. table_node['classes'] += self.options.get('class', [])
  133. self.set_table_width(table_node)
  134. if 'align' in self.options:
  135. table_node['align'] = self.options.get('align')
  136. if isinstance(self.widths, list):
  137. tgroup = table_node[0]
  138. try:
  139. col_widths = self.get_column_widths(tgroup["cols"])
  140. except SystemMessagePropagation as detail:
  141. return [detail.args[0]]
  142. colspecs = [child for child in tgroup.children
  143. if child.tagname == 'colspec']
  144. for colspec, col_width in zip(colspecs, col_widths):
  145. colspec['colwidth'] = col_width
  146. if self.widths == 'auto':
  147. table_node['classes'] += ['colwidths-auto']
  148. elif self.widths: # "grid" or list of integers
  149. table_node['classes'] += ['colwidths-given']
  150. self.add_name(table_node)
  151. if title:
  152. table_node.insert(0, title)
  153. return [table_node] + messages
  154. class CSVTable(Table):
  155. option_spec = {'header-rows': directives.nonnegative_int,
  156. 'stub-columns': directives.nonnegative_int,
  157. 'header': directives.unchanged,
  158. 'width': directives.length_or_percentage_or_unitless,
  159. 'widths': directives.value_or(('auto', ),
  160. directives.positive_int_list),
  161. 'file': directives.path,
  162. 'url': directives.uri,
  163. 'encoding': directives.encoding,
  164. 'class': directives.class_option,
  165. 'name': directives.unchanged,
  166. 'align': align,
  167. # field delimiter char
  168. 'delim': directives.single_char_or_whitespace_or_unicode,
  169. # treat whitespace after delimiter as significant
  170. 'keepspace': directives.flag,
  171. # text field quote/unquote char:
  172. 'quote': directives.single_char_or_unicode,
  173. # char used to escape delim & quote as-needed:
  174. 'escape': directives.single_char_or_unicode}
  175. class DocutilsDialect(csv.Dialect):
  176. """CSV dialect for `csv_table` directive."""
  177. delimiter = ','
  178. quotechar = '"'
  179. doublequote = True
  180. skipinitialspace = True
  181. strict = True
  182. lineterminator = '\n'
  183. quoting = csv.QUOTE_MINIMAL
  184. def __init__(self, options):
  185. if 'delim' in options:
  186. self.delimiter = options['delim']
  187. if 'keepspace' in options:
  188. self.skipinitialspace = False
  189. if 'quote' in options:
  190. self.quotechar = options['quote']
  191. if 'escape' in options:
  192. self.doublequote = False
  193. self.escapechar = options['escape']
  194. csv.Dialect.__init__(self)
  195. class HeaderDialect(csv.Dialect):
  196. """CSV dialect to use for the "header" option data."""
  197. delimiter = ','
  198. quotechar = '"'
  199. escapechar = '\\'
  200. doublequote = False
  201. skipinitialspace = True
  202. strict = True
  203. lineterminator = '\n'
  204. quoting = csv.QUOTE_MINIMAL
  205. def check_requirements(self):
  206. pass
  207. def run(self):
  208. try:
  209. if (not self.state.document.settings.file_insertion_enabled
  210. and ('file' in self.options
  211. or 'url' in self.options)):
  212. warning = self.reporter.warning('File and URL access '
  213. 'deactivated; ignoring "%s" directive.' % self.name,
  214. nodes.literal_block(self.block_text, self.block_text),
  215. line=self.lineno)
  216. return [warning]
  217. self.check_requirements()
  218. title, messages = self.make_title()
  219. csv_data, source = self.get_csv_data()
  220. table_head, max_header_cols = self.process_header_option()
  221. rows, max_cols = self.parse_csv_data_into_rows(
  222. csv_data, self.DocutilsDialect(self.options), source)
  223. max_cols = max(max_cols, max_header_cols)
  224. header_rows = self.options.get('header-rows', 0)
  225. stub_columns = self.options.get('stub-columns', 0)
  226. self.check_table_dimensions(rows, header_rows, stub_columns)
  227. table_head.extend(rows[:header_rows])
  228. table_body = rows[header_rows:]
  229. col_widths = self.get_column_widths(max_cols)
  230. self.extend_short_rows_with_empty_cells(max_cols,
  231. (table_head, table_body))
  232. except SystemMessagePropagation as detail:
  233. return [detail.args[0]]
  234. except csv.Error as detail:
  235. message = str(detail)
  236. error = self.reporter.error('Error with CSV data'
  237. ' in "%s" directive:\n%s' % (self.name, message),
  238. nodes.literal_block(self.block_text, self.block_text),
  239. line=self.lineno)
  240. return [error]
  241. table = (col_widths, table_head, table_body)
  242. table_node = self.state.build_table(table, self.content_offset,
  243. stub_columns, widths=self.widths)
  244. table_node['classes'] += self.options.get('class', [])
  245. if 'align' in self.options:
  246. table_node['align'] = self.options.get('align')
  247. self.set_table_width(table_node)
  248. self.add_name(table_node)
  249. if title:
  250. table_node.insert(0, title)
  251. return [table_node] + messages
  252. def get_csv_data(self):
  253. """
  254. Get CSV data from the directive content, from an external
  255. file, or from a URL reference.
  256. """
  257. encoding = self.options.get(
  258. 'encoding', self.state.document.settings.input_encoding)
  259. error_handler = self.state.document.settings.input_encoding_error_handler # noqa:E501
  260. if self.content:
  261. # CSV data is from directive content.
  262. if 'file' in self.options or 'url' in self.options:
  263. error = self.reporter.error('"%s" directive may not both '
  264. 'specify an external file and have content.' % self.name,
  265. nodes.literal_block(self.block_text, self.block_text),
  266. line=self.lineno)
  267. raise SystemMessagePropagation(error)
  268. source = self.content.source(0)
  269. csv_data = self.content
  270. elif 'file' in self.options:
  271. # CSV data is from an external file.
  272. if 'url' in self.options:
  273. error = self.reporter.error('The "file" and "url" options '
  274. 'may not be simultaneously specified '
  275. 'for the "%s" directive.' % self.name,
  276. nodes.literal_block(self.block_text, self.block_text),
  277. line=self.lineno)
  278. raise SystemMessagePropagation(error)
  279. source_dir = os.path.dirname(
  280. os.path.abspath(self.state.document.current_source))
  281. source = os.path.normpath(os.path.join(source_dir,
  282. self.options['file']))
  283. source = utils.relative_path(None, source)
  284. try:
  285. csv_file = io.FileInput(source_path=source,
  286. encoding=encoding,
  287. error_handler=error_handler)
  288. csv_data = csv_file.read().splitlines()
  289. except OSError as error:
  290. severe = self.reporter.severe(
  291. 'Problems with "%s" directive path:\n%s.'
  292. % (self.name, error),
  293. nodes.literal_block(self.block_text, self.block_text),
  294. line=self.lineno)
  295. raise SystemMessagePropagation(severe)
  296. else:
  297. self.state.document.settings.record_dependencies.add(source)
  298. elif 'url' in self.options:
  299. # CSV data is from a URL.
  300. # Do not import urllib at the top of the module because
  301. # it may fail due to broken SSL dependencies, and it takes
  302. # about 0.15 seconds to load. Update: < 0.03s with Py3k.
  303. from urllib.request import urlopen
  304. from urllib.error import URLError
  305. source = self.options['url']
  306. try:
  307. csv_text = urlopen(source).read()
  308. except (URLError, OSError, ValueError) as error:
  309. severe = self.reporter.severe(
  310. 'Problems with "%s" directive URL "%s":\n%s.'
  311. % (self.name, self.options['url'], error),
  312. nodes.literal_block(self.block_text, self.block_text),
  313. line=self.lineno)
  314. raise SystemMessagePropagation(severe)
  315. csv_file = io.StringInput(
  316. source=csv_text, source_path=source, encoding=encoding,
  317. error_handler=(self.state.document.settings.
  318. input_encoding_error_handler))
  319. csv_data = csv_file.read().splitlines()
  320. else:
  321. error = self.reporter.warning(
  322. 'The "%s" directive requires content; none supplied.'
  323. % self.name,
  324. nodes.literal_block(self.block_text, self.block_text),
  325. line=self.lineno)
  326. raise SystemMessagePropagation(error)
  327. return csv_data, source
  328. @staticmethod
  329. def decode_from_csv(s):
  330. warnings.warn('CSVTable.decode_from_csv()'
  331. ' is not required with Python 3'
  332. ' and will be removed in Docutils 0.21 or later.',
  333. DeprecationWarning, stacklevel=2)
  334. return s
  335. @staticmethod
  336. def encode_for_csv(s):
  337. warnings.warn('CSVTable.encode_from_csv()'
  338. ' is not required with Python 3'
  339. ' and will be removed in Docutils 0.21 or later.',
  340. DeprecationWarning, stacklevel=2)
  341. return s
  342. def parse_csv_data_into_rows(self, csv_data, dialect, source):
  343. csv_reader = csv.reader([line + '\n' for line in csv_data],
  344. dialect=dialect)
  345. rows = []
  346. max_cols = 0
  347. for row in csv_reader:
  348. row_data = []
  349. for cell in row:
  350. cell_data = (0, 0, 0, statemachine.StringList(
  351. cell.splitlines(), source=source))
  352. row_data.append(cell_data)
  353. rows.append(row_data)
  354. max_cols = max(max_cols, len(row))
  355. return rows, max_cols
  356. class ListTable(Table):
  357. """
  358. Implement tables whose data is encoded as a uniform two-level bullet list.
  359. For further ideas, see
  360. https://docutils.sourceforge.io/docs/dev/rst/alternatives.html#list-driven-tables
  361. """
  362. option_spec = {'header-rows': directives.nonnegative_int,
  363. 'stub-columns': directives.nonnegative_int,
  364. 'width': directives.length_or_percentage_or_unitless,
  365. 'widths': directives.value_or(('auto', ),
  366. directives.positive_int_list),
  367. 'class': directives.class_option,
  368. 'name': directives.unchanged,
  369. 'align': align}
  370. def run(self):
  371. if not self.content:
  372. error = self.reporter.error('The "%s" directive is empty; '
  373. 'content required.' % self.name,
  374. nodes.literal_block(self.block_text, self.block_text),
  375. line=self.lineno)
  376. return [error]
  377. title, messages = self.make_title()
  378. node = nodes.Element() # anonymous container for parsing
  379. self.state.nested_parse(self.content, self.content_offset, node)
  380. try:
  381. num_cols, col_widths = self.check_list_content(node)
  382. table_data = [[item.children for item in row_list[0]]
  383. for row_list in node[0]]
  384. header_rows = self.options.get('header-rows', 0)
  385. stub_columns = self.options.get('stub-columns', 0)
  386. self.check_table_dimensions(table_data, header_rows, stub_columns)
  387. except SystemMessagePropagation as detail:
  388. return [detail.args[0]]
  389. table_node = self.build_table_from_list(table_data, col_widths,
  390. header_rows, stub_columns)
  391. if 'align' in self.options:
  392. table_node['align'] = self.options.get('align')
  393. table_node['classes'] += self.options.get('class', [])
  394. self.set_table_width(table_node)
  395. self.add_name(table_node)
  396. if title:
  397. table_node.insert(0, title)
  398. return [table_node] + messages
  399. def check_list_content(self, node):
  400. if len(node) != 1 or not isinstance(node[0], nodes.bullet_list):
  401. error = self.reporter.error(
  402. 'Error parsing content block for the "%s" directive: '
  403. 'exactly one bullet list expected.' % self.name,
  404. nodes.literal_block(self.block_text, self.block_text),
  405. line=self.lineno)
  406. raise SystemMessagePropagation(error)
  407. list_node = node[0]
  408. num_cols = 0
  409. # Check for a uniform two-level bullet list:
  410. for item_index in range(len(list_node)):
  411. item = list_node[item_index]
  412. if len(item) != 1 or not isinstance(item[0], nodes.bullet_list):
  413. error = self.reporter.error(
  414. 'Error parsing content block for the "%s" directive: '
  415. 'two-level bullet list expected, but row %s does not '
  416. 'contain a second-level bullet list.'
  417. % (self.name, item_index + 1),
  418. nodes.literal_block(self.block_text, self.block_text),
  419. line=self.lineno)
  420. raise SystemMessagePropagation(error)
  421. elif item_index:
  422. if len(item[0]) != num_cols:
  423. error = self.reporter.error(
  424. 'Error parsing content block for the "%s" directive: '
  425. 'uniform two-level bullet list expected, but row %s '
  426. 'does not contain the same number of items as row 1 '
  427. '(%s vs %s).'
  428. % (self.name, item_index + 1, len(item[0]), num_cols),
  429. nodes.literal_block(self.block_text, self.block_text),
  430. line=self.lineno)
  431. raise SystemMessagePropagation(error)
  432. else:
  433. num_cols = len(item[0])
  434. col_widths = self.get_column_widths(num_cols)
  435. return num_cols, col_widths
  436. def build_table_from_list(self, table_data,
  437. col_widths, header_rows, stub_columns):
  438. table = nodes.table()
  439. if self.widths == 'auto':
  440. table['classes'] += ['colwidths-auto']
  441. elif self.widths: # explicitly set column widths
  442. table['classes'] += ['colwidths-given']
  443. tgroup = nodes.tgroup(cols=len(col_widths))
  444. table += tgroup
  445. for col_width in col_widths:
  446. colspec = nodes.colspec()
  447. if col_width is not None:
  448. colspec.attributes['colwidth'] = col_width
  449. if stub_columns:
  450. colspec.attributes['stub'] = 1
  451. stub_columns -= 1
  452. tgroup += colspec
  453. rows = []
  454. for row in table_data:
  455. row_node = nodes.row()
  456. for cell in row:
  457. entry = nodes.entry()
  458. entry += cell
  459. row_node += entry
  460. rows.append(row_node)
  461. if header_rows:
  462. thead = nodes.thead()
  463. thead.extend(rows[:header_rows])
  464. tgroup += thead
  465. tbody = nodes.tbody()
  466. tbody.extend(rows[header_rows:])
  467. tgroup += tbody
  468. return table