tokenizer.js 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318
  1. 'use strict'
  2. module.exports = factory
  3. // Construct a tokenizer. This creates both `tokenizeInline` and `tokenizeBlock`.
  4. function factory(type) {
  5. return tokenize
  6. // Tokenizer for a bound `type`.
  7. function tokenize(value, location) {
  8. var self = this
  9. var offset = self.offset
  10. var tokens = []
  11. var methods = self[type + 'Methods']
  12. var tokenizers = self[type + 'Tokenizers']
  13. var line = location.line
  14. var column = location.column
  15. var index
  16. var length
  17. var method
  18. var name
  19. var matched
  20. var valueLength
  21. // Trim white space only lines.
  22. if (!value) {
  23. return tokens
  24. }
  25. // Expose on `eat`.
  26. eat.now = now
  27. eat.file = self.file
  28. // Sync initial offset.
  29. updatePosition('')
  30. // Iterate over `value`, and iterate over all tokenizers. When one eats
  31. // something, re-iterate with the remaining value. If no tokenizer eats,
  32. // something failed (should not happen) and an exception is thrown.
  33. while (value) {
  34. index = -1
  35. length = methods.length
  36. matched = false
  37. while (++index < length) {
  38. name = methods[index]
  39. method = tokenizers[name]
  40. // Previously, we had constructs such as footnotes and YAML that used
  41. // these properties.
  42. // Those are now external (plus there are userland extensions), that may
  43. // still use them.
  44. if (
  45. method &&
  46. /* istanbul ignore next */ (!method.onlyAtStart || self.atStart) &&
  47. /* istanbul ignore next */ (!method.notInList || !self.inList) &&
  48. /* istanbul ignore next */ (!method.notInBlock || !self.inBlock) &&
  49. (!method.notInLink || !self.inLink)
  50. ) {
  51. valueLength = value.length
  52. method.apply(self, [eat, value])
  53. matched = valueLength !== value.length
  54. if (matched) {
  55. break
  56. }
  57. }
  58. }
  59. /* istanbul ignore if */
  60. if (!matched) {
  61. self.file.fail(new Error('Infinite loop'), eat.now())
  62. }
  63. }
  64. self.eof = now()
  65. return tokens
  66. // Update line, column, and offset based on `value`.
  67. function updatePosition(subvalue) {
  68. var lastIndex = -1
  69. var index = subvalue.indexOf('\n')
  70. while (index !== -1) {
  71. line++
  72. lastIndex = index
  73. index = subvalue.indexOf('\n', index + 1)
  74. }
  75. if (lastIndex === -1) {
  76. column += subvalue.length
  77. } else {
  78. column = subvalue.length - lastIndex
  79. }
  80. if (line in offset) {
  81. if (lastIndex !== -1) {
  82. column += offset[line]
  83. } else if (column <= offset[line]) {
  84. column = offset[line] + 1
  85. }
  86. }
  87. }
  88. // Get offset. Called before the first character is eaten to retrieve the
  89. // range’s offsets.
  90. function getOffset() {
  91. var indentation = []
  92. var pos = line + 1
  93. // Done. Called when the last character is eaten to retrieve the range’s
  94. // offsets.
  95. return function () {
  96. var last = line + 1
  97. while (pos < last) {
  98. indentation.push((offset[pos] || 0) + 1)
  99. pos++
  100. }
  101. return indentation
  102. }
  103. }
  104. // Get the current position.
  105. function now() {
  106. var pos = {line: line, column: column}
  107. pos.offset = self.toOffset(pos)
  108. return pos
  109. }
  110. // Store position information for a node.
  111. function Position(start) {
  112. this.start = start
  113. this.end = now()
  114. }
  115. // Throw when a value is incorrectly eaten. This shouldn’t happen but will
  116. // throw on new, incorrect rules.
  117. function validateEat(subvalue) {
  118. /* istanbul ignore if */
  119. if (value.slice(0, subvalue.length) !== subvalue) {
  120. // Capture stack-trace.
  121. self.file.fail(
  122. new Error(
  123. 'Incorrectly eaten value: please report this warning on https://git.io/vg5Ft'
  124. ),
  125. now()
  126. )
  127. }
  128. }
  129. // Mark position and patch `node.position`.
  130. function position() {
  131. var before = now()
  132. return update
  133. // Add the position to a node.
  134. function update(node, indent) {
  135. var previous = node.position
  136. var start = previous ? previous.start : before
  137. var combined = []
  138. var n = previous && previous.end.line
  139. var l = before.line
  140. node.position = new Position(start)
  141. // If there was already a `position`, this node was merged. Fixing
  142. // `start` wasn’t hard, but the indent is different. Especially
  143. // because some information, the indent between `n` and `l` wasn’t
  144. // tracked. Luckily, that space is (should be?) empty, so we can
  145. // safely check for it now.
  146. if (previous && indent && previous.indent) {
  147. combined = previous.indent
  148. if (n < l) {
  149. while (++n < l) {
  150. combined.push((offset[n] || 0) + 1)
  151. }
  152. combined.push(before.column)
  153. }
  154. indent = combined.concat(indent)
  155. }
  156. node.position.indent = indent || []
  157. return node
  158. }
  159. }
  160. // Add `node` to `parent`s children or to `tokens`. Performs merges where
  161. // possible.
  162. function add(node, parent) {
  163. var children = parent ? parent.children : tokens
  164. var previous = children[children.length - 1]
  165. var fn
  166. if (
  167. previous &&
  168. node.type === previous.type &&
  169. (node.type === 'text' || node.type === 'blockquote') &&
  170. mergeable(previous) &&
  171. mergeable(node)
  172. ) {
  173. fn = node.type === 'text' ? mergeText : mergeBlockquote
  174. node = fn.call(self, previous, node)
  175. }
  176. if (node !== previous) {
  177. children.push(node)
  178. }
  179. if (self.atStart && tokens.length !== 0) {
  180. self.exitStart()
  181. }
  182. return node
  183. }
  184. // Remove `subvalue` from `value`. `subvalue` must be at the start of
  185. // `value`.
  186. function eat(subvalue) {
  187. var indent = getOffset()
  188. var pos = position()
  189. var current = now()
  190. validateEat(subvalue)
  191. apply.reset = reset
  192. reset.test = test
  193. apply.test = test
  194. value = value.slice(subvalue.length)
  195. updatePosition(subvalue)
  196. indent = indent()
  197. return apply
  198. // Add the given arguments, add `position` to the returned node, and
  199. // return the node.
  200. function apply(node, parent) {
  201. return pos(add(pos(node), parent), indent)
  202. }
  203. // Functions just like apply, but resets the content: the line and
  204. // column are reversed, and the eaten value is re-added. This is
  205. // useful for nodes with a single type of content, such as lists and
  206. // tables. See `apply` above for what parameters are expected.
  207. function reset() {
  208. var node = apply.apply(null, arguments)
  209. line = current.line
  210. column = current.column
  211. value = subvalue + value
  212. return node
  213. }
  214. // Test the position, after eating, and reverse to a not-eaten state.
  215. function test() {
  216. var result = pos({})
  217. line = current.line
  218. column = current.column
  219. value = subvalue + value
  220. return result.position
  221. }
  222. }
  223. }
  224. }
  225. // Check whether a node is mergeable with adjacent nodes.
  226. function mergeable(node) {
  227. var start
  228. var end
  229. if (node.type !== 'text' || !node.position) {
  230. return true
  231. }
  232. start = node.position.start
  233. end = node.position.end
  234. // Only merge nodes which occupy the same size as their `value`.
  235. return (
  236. start.line !== end.line || end.column - start.column === node.value.length
  237. )
  238. }
  239. // Merge two text nodes: `node` into `prev`.
  240. function mergeText(previous, node) {
  241. previous.value += node.value
  242. return previous
  243. }
  244. // Merge two blockquotes: `node` into `prev`, unless in CommonMark or gfm modes.
  245. function mergeBlockquote(previous, node) {
  246. if (this.options.commonmark || this.options.gfm) {
  247. return node
  248. }
  249. previous.children = previous.children.concat(node.children)
  250. return previous
  251. }