config.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. import re
  2. # Use Unicode characters instead of their ascii pseudo-replacements
  3. UNICODE_SNOB = False
  4. # Marker to use for marking tables for padding post processing
  5. TABLE_MARKER_FOR_PAD = "special_marker_for_table_padding"
  6. # Escape all special characters. Output is less readable, but avoids
  7. # corner case formatting issues.
  8. ESCAPE_SNOB = False
  9. # Put the links after each paragraph instead of at the end.
  10. LINKS_EACH_PARAGRAPH = False
  11. # Wrap long lines at position. 0 for no wrapping.
  12. BODY_WIDTH = 78
  13. # Don't show internal links (href="#local-anchor") -- corresponding link
  14. # targets won't be visible in the plain text file anyway.
  15. SKIP_INTERNAL_LINKS = True
  16. # Use inline, rather than reference, formatting for images and links
  17. INLINE_LINKS = True
  18. # Protect links from line breaks surrounding them with angle brackets (in
  19. # addition to their square brackets)
  20. PROTECT_LINKS = False
  21. # WRAP_LINKS = True
  22. WRAP_LINKS = True
  23. # Wrap list items.
  24. WRAP_LIST_ITEMS = False
  25. # Number of pixels Google indents nested lists
  26. GOOGLE_LIST_INDENT = 36
  27. # Values Google and others may use to indicate bold text
  28. BOLD_TEXT_STYLE_VALUES = ("bold", "700", "800", "900")
  29. IGNORE_ANCHORS = False
  30. IGNORE_IMAGES = False
  31. IMAGES_AS_HTML = False
  32. IMAGES_TO_ALT = False
  33. IMAGES_WITH_SIZE = False
  34. IGNORE_EMPHASIS = False
  35. MARK_CODE = False
  36. DECODE_ERRORS = "strict"
  37. DEFAULT_IMAGE_ALT = ""
  38. PAD_TABLES = False
  39. # Convert links with same href and text to <href> format
  40. # if they are absolute links
  41. USE_AUTOMATIC_LINKS = True
  42. # For checking space-only lines on line 771
  43. RE_SPACE = re.compile(r"\s\+")
  44. RE_ORDERED_LIST_MATCHER = re.compile(r"\d+\.\s")
  45. RE_UNORDERED_LIST_MATCHER = re.compile(r"[-\*\+]\s")
  46. RE_MD_CHARS_MATCHER = re.compile(r"([\\\[\]\(\)])")
  47. RE_MD_CHARS_MATCHER_ALL = re.compile(r"([`\*_{}\[\]\(\)#!])")
  48. # to find links in the text
  49. RE_LINK = re.compile(r"(\[.*?\] ?\(.*?\))|(\[.*?\]:.*?)")
  50. RE_MD_DOT_MATCHER = re.compile(
  51. r"""
  52. ^ # start of line
  53. (\s*\d+) # optional whitespace and a number
  54. (\.) # dot
  55. (?=\s) # lookahead assert whitespace
  56. """,
  57. re.MULTILINE | re.VERBOSE,
  58. )
  59. RE_MD_PLUS_MATCHER = re.compile(
  60. r"""
  61. ^
  62. (\s*)
  63. (\+)
  64. (?=\s)
  65. """,
  66. flags=re.MULTILINE | re.VERBOSE,
  67. )
  68. RE_MD_DASH_MATCHER = re.compile(
  69. r"""
  70. ^
  71. (\s*)
  72. (-)
  73. (?=\s|\-) # followed by whitespace (bullet list, or spaced out hr)
  74. # or another dash (header or hr)
  75. """,
  76. flags=re.MULTILINE | re.VERBOSE,
  77. )
  78. RE_SLASH_CHARS = r"\`*_{}[]()#+-.!"
  79. RE_MD_BACKSLASH_MATCHER = re.compile(
  80. r"""
  81. (\\) # match one slash
  82. (?=[%s]) # followed by a char that requires escaping
  83. """
  84. % re.escape(RE_SLASH_CHARS),
  85. flags=re.VERBOSE,
  86. )
  87. UNIFIABLE = {
  88. "rsquo": "'",
  89. "lsquo": "'",
  90. "rdquo": '"',
  91. "ldquo": '"',
  92. "copy": "(C)",
  93. "mdash": "--",
  94. "nbsp": " ",
  95. "rarr": "->",
  96. "larr": "<-",
  97. "middot": "*",
  98. "ndash": "-",
  99. "oelig": "oe",
  100. "aelig": "ae",
  101. "agrave": "a",
  102. "aacute": "a",
  103. "acirc": "a",
  104. "atilde": "a",
  105. "auml": "a",
  106. "aring": "a",
  107. "egrave": "e",
  108. "eacute": "e",
  109. "ecirc": "e",
  110. "euml": "e",
  111. "igrave": "i",
  112. "iacute": "i",
  113. "icirc": "i",
  114. "iuml": "i",
  115. "ograve": "o",
  116. "oacute": "o",
  117. "ocirc": "o",
  118. "otilde": "o",
  119. "ouml": "o",
  120. "ugrave": "u",
  121. "uacute": "u",
  122. "ucirc": "u",
  123. "uuml": "u",
  124. "lrm": "",
  125. "rlm": "",
  126. }
  127. # Format tables in HTML rather than Markdown syntax
  128. BYPASS_TABLES = False
  129. # Ignore table-related tags (table, th, td, tr) while keeping rows
  130. IGNORE_TABLES = False
  131. # Use a single line break after a block element rather than two line breaks.
  132. # NOTE: Requires body width setting to be 0.
  133. SINGLE_LINE_BREAK = False
  134. # Use double quotation marks when converting the <q> tag.
  135. OPEN_QUOTE = '"'
  136. CLOSE_QUOTE = '"'