123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318 |
- 'use strict'
- module.exports = factory
- // Construct a tokenizer. This creates both `tokenizeInline` and `tokenizeBlock`.
- function factory(type) {
- return tokenize
- // Tokenizer for a bound `type`.
- function tokenize(value, location) {
- var self = this
- var offset = self.offset
- var tokens = []
- var methods = self[type + 'Methods']
- var tokenizers = self[type + 'Tokenizers']
- var line = location.line
- var column = location.column
- var index
- var length
- var method
- var name
- var matched
- var valueLength
- // Trim white space only lines.
- if (!value) {
- return tokens
- }
- // Expose on `eat`.
- eat.now = now
- eat.file = self.file
- // Sync initial offset.
- updatePosition('')
- // Iterate over `value`, and iterate over all tokenizers. When one eats
- // something, re-iterate with the remaining value. If no tokenizer eats,
- // something failed (should not happen) and an exception is thrown.
- while (value) {
- index = -1
- length = methods.length
- matched = false
- while (++index < length) {
- name = methods[index]
- method = tokenizers[name]
- // Previously, we had constructs such as footnotes and YAML that used
- // these properties.
- // Those are now external (plus there are userland extensions), that may
- // still use them.
- if (
- method &&
- /* istanbul ignore next */ (!method.onlyAtStart || self.atStart) &&
- /* istanbul ignore next */ (!method.notInList || !self.inList) &&
- /* istanbul ignore next */ (!method.notInBlock || !self.inBlock) &&
- (!method.notInLink || !self.inLink)
- ) {
- valueLength = value.length
- method.apply(self, [eat, value])
- matched = valueLength !== value.length
- if (matched) {
- break
- }
- }
- }
- /* istanbul ignore if */
- if (!matched) {
- self.file.fail(new Error('Infinite loop'), eat.now())
- }
- }
- self.eof = now()
- return tokens
- // Update line, column, and offset based on `value`.
- function updatePosition(subvalue) {
- var lastIndex = -1
- var index = subvalue.indexOf('\n')
- while (index !== -1) {
- line++
- lastIndex = index
- index = subvalue.indexOf('\n', index + 1)
- }
- if (lastIndex === -1) {
- column += subvalue.length
- } else {
- column = subvalue.length - lastIndex
- }
- if (line in offset) {
- if (lastIndex !== -1) {
- column += offset[line]
- } else if (column <= offset[line]) {
- column = offset[line] + 1
- }
- }
- }
- // Get offset. Called before the first character is eaten to retrieve the
- // range’s offsets.
- function getOffset() {
- var indentation = []
- var pos = line + 1
- // Done. Called when the last character is eaten to retrieve the range’s
- // offsets.
- return function () {
- var last = line + 1
- while (pos < last) {
- indentation.push((offset[pos] || 0) + 1)
- pos++
- }
- return indentation
- }
- }
- // Get the current position.
- function now() {
- var pos = {line: line, column: column}
- pos.offset = self.toOffset(pos)
- return pos
- }
- // Store position information for a node.
- function Position(start) {
- this.start = start
- this.end = now()
- }
- // Throw when a value is incorrectly eaten. This shouldn’t happen but will
- // throw on new, incorrect rules.
- function validateEat(subvalue) {
- /* istanbul ignore if */
- if (value.slice(0, subvalue.length) !== subvalue) {
- // Capture stack-trace.
- self.file.fail(
- new Error(
- 'Incorrectly eaten value: please report this warning on https://git.io/vg5Ft'
- ),
- now()
- )
- }
- }
- // Mark position and patch `node.position`.
- function position() {
- var before = now()
- return update
- // Add the position to a node.
- function update(node, indent) {
- var previous = node.position
- var start = previous ? previous.start : before
- var combined = []
- var n = previous && previous.end.line
- var l = before.line
- node.position = new Position(start)
- // If there was already a `position`, this node was merged. Fixing
- // `start` wasn’t hard, but the indent is different. Especially
- // because some information, the indent between `n` and `l` wasn’t
- // tracked. Luckily, that space is (should be?) empty, so we can
- // safely check for it now.
- if (previous && indent && previous.indent) {
- combined = previous.indent
- if (n < l) {
- while (++n < l) {
- combined.push((offset[n] || 0) + 1)
- }
- combined.push(before.column)
- }
- indent = combined.concat(indent)
- }
- node.position.indent = indent || []
- return node
- }
- }
- // Add `node` to `parent`s children or to `tokens`. Performs merges where
- // possible.
- function add(node, parent) {
- var children = parent ? parent.children : tokens
- var previous = children[children.length - 1]
- var fn
- if (
- previous &&
- node.type === previous.type &&
- (node.type === 'text' || node.type === 'blockquote') &&
- mergeable(previous) &&
- mergeable(node)
- ) {
- fn = node.type === 'text' ? mergeText : mergeBlockquote
- node = fn.call(self, previous, node)
- }
- if (node !== previous) {
- children.push(node)
- }
- if (self.atStart && tokens.length !== 0) {
- self.exitStart()
- }
- return node
- }
- // Remove `subvalue` from `value`. `subvalue` must be at the start of
- // `value`.
- function eat(subvalue) {
- var indent = getOffset()
- var pos = position()
- var current = now()
- validateEat(subvalue)
- apply.reset = reset
- reset.test = test
- apply.test = test
- value = value.slice(subvalue.length)
- updatePosition(subvalue)
- indent = indent()
- return apply
- // Add the given arguments, add `position` to the returned node, and
- // return the node.
- function apply(node, parent) {
- return pos(add(pos(node), parent), indent)
- }
- // Functions just like apply, but resets the content: the line and
- // column are reversed, and the eaten value is re-added. This is
- // useful for nodes with a single type of content, such as lists and
- // tables. See `apply` above for what parameters are expected.
- function reset() {
- var node = apply.apply(null, arguments)
- line = current.line
- column = current.column
- value = subvalue + value
- return node
- }
- // Test the position, after eating, and reverse to a not-eaten state.
- function test() {
- var result = pos({})
- line = current.line
- column = current.column
- value = subvalue + value
- return result.position
- }
- }
- }
- }
- // Check whether a node is mergeable with adjacent nodes.
- function mergeable(node) {
- var start
- var end
- if (node.type !== 'text' || !node.position) {
- return true
- }
- start = node.position.start
- end = node.position.end
- // Only merge nodes which occupy the same size as their `value`.
- return (
- start.line !== end.line || end.column - start.column === node.value.length
- )
- }
- // Merge two text nodes: `node` into `prev`.
- function mergeText(previous, node) {
- previous.value += node.value
- return previous
- }
- // Merge two blockquotes: `node` into `prev`, unless in CommonMark or gfm modes.
- function mergeBlockquote(previous, node) {
- if (this.options.commonmark || this.options.gfm) {
- return node
- }
- previous.children = previous.children.concat(node.children)
- return previous
- }
|