123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210 |
- 'use strict'
- var ccount = require('ccount')
- var decode = require('parse-entities')
- var decimal = require('is-decimal')
- var alphabetical = require('is-alphabetical')
- var whitespace = require('is-whitespace-character')
- var locate = require('../locate/url')
- module.exports = url
- url.locator = locate
- url.notInLink = true
- var exclamationMark = 33 // '!'
- var ampersand = 38 // '&'
- var rightParenthesis = 41 // ')'
- var asterisk = 42 // '*'
- var comma = 44 // ','
- var dash = 45 // '-'
- var dot = 46 // '.'
- var colon = 58 // ':'
- var semicolon = 59 // ';'
- var questionMark = 63 // '?'
- var lessThan = 60 // '<'
- var underscore = 95 // '_'
- var tilde = 126 // '~'
- var leftParenthesisCharacter = '('
- var rightParenthesisCharacter = ')'
- function url(eat, value, silent) {
- var self = this
- var gfm = self.options.gfm
- var tokenizers = self.inlineTokenizers
- var length = value.length
- var previousDot = -1
- var protocolless = false
- var dots
- var lastTwoPartsStart
- var start
- var index
- var pathStart
- var path
- var code
- var end
- var leftCount
- var rightCount
- var content
- var children
- var url
- var exit
- if (!gfm) {
- return
- }
- // `WWW.` doesn’t work.
- if (value.slice(0, 4) === 'www.') {
- protocolless = true
- index = 4
- } else if (value.slice(0, 7).toLowerCase() === 'http://') {
- index = 7
- } else if (value.slice(0, 8).toLowerCase() === 'https://') {
- index = 8
- } else {
- return
- }
- // Act as if the starting boundary is a dot.
- previousDot = index - 1
- // Parse a valid domain.
- start = index
- dots = []
- while (index < length) {
- code = value.charCodeAt(index)
- if (code === dot) {
- // Dots may not appear after each other.
- if (previousDot === index - 1) {
- break
- }
- dots.push(index)
- previousDot = index
- index++
- continue
- }
- if (
- decimal(code) ||
- alphabetical(code) ||
- code === dash ||
- code === underscore
- ) {
- index++
- continue
- }
- break
- }
- // Ignore a final dot:
- if (code === dot) {
- dots.pop()
- index--
- }
- // If there are not dots, exit.
- if (dots[0] === undefined) {
- return
- }
- // If there is an underscore in the last two domain parts, exit:
- // `www.example.c_m` and `www.ex_ample.com` are not OK, but
- // `www.sub_domain.example.com` is.
- lastTwoPartsStart = dots.length < 2 ? start : dots[dots.length - 2] + 1
- if (value.slice(lastTwoPartsStart, index).indexOf('_') !== -1) {
- return
- }
- /* istanbul ignore if - never used (yet) */
- if (silent) {
- return true
- }
- end = index
- pathStart = index
- // Parse a path.
- while (index < length) {
- code = value.charCodeAt(index)
- if (whitespace(code) || code === lessThan) {
- break
- }
- index++
- if (
- code === exclamationMark ||
- code === asterisk ||
- code === comma ||
- code === dot ||
- code === colon ||
- code === questionMark ||
- code === underscore ||
- code === tilde
- ) {
- // Empty
- } else {
- end = index
- }
- }
- index = end
- // If the path ends in a closing paren, and the count of closing parens is
- // higher than the opening count, then remove the supefluous closing parens.
- if (value.charCodeAt(index - 1) === rightParenthesis) {
- path = value.slice(pathStart, index)
- leftCount = ccount(path, leftParenthesisCharacter)
- rightCount = ccount(path, rightParenthesisCharacter)
- while (rightCount > leftCount) {
- index = pathStart + path.lastIndexOf(rightParenthesisCharacter)
- path = value.slice(pathStart, index)
- rightCount--
- }
- }
- if (value.charCodeAt(index - 1) === semicolon) {
- // GitHub doesn’t document this, but final semicolons aren’t paret of the
- // URL either.
- index--
- // // If the path ends in what looks like an entity, it’s not part of the path.
- if (alphabetical(value.charCodeAt(index - 1))) {
- end = index - 2
- while (alphabetical(value.charCodeAt(end))) {
- end--
- }
- if (value.charCodeAt(end) === ampersand) {
- index = end
- }
- }
- }
- content = value.slice(0, index)
- url = decode(content, {nonTerminated: false})
- if (protocolless) {
- url = 'http://' + url
- }
- exit = self.enterLink()
- // Temporarily remove all tokenizers except text in url.
- self.inlineTokenizers = {text: tokenizers.text}
- children = self.tokenizeInline(content, eat.now())
- self.inlineTokenizers = tokenizers
- exit()
- return eat(content)({type: 'link', title: null, url: url, children: children})
- }
|