index.js 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459
  1. 'use strict'
  2. // Do a two-pass walk, first to get the list of packages that need to be
  3. // bundled, then again to get the actual files and folders.
  4. // Keep a cache of node_modules content and package.json data, so that the
  5. // second walk doesn't have to re-do all the same work.
  6. const bundleWalk = require('npm-bundled')
  7. const BundleWalker = bundleWalk.BundleWalker
  8. const BundleWalkerSync = bundleWalk.BundleWalkerSync
  9. const ignoreWalk = require('ignore-walk')
  10. const IgnoreWalker = ignoreWalk.Walker
  11. const IgnoreWalkerSync = ignoreWalk.WalkerSync
  12. const rootBuiltinRules = Symbol('root-builtin-rules')
  13. const packageNecessaryRules = Symbol('package-necessary-rules')
  14. const path = require('path')
  15. const normalizePackageBin = require('npm-normalize-package-bin')
  16. // Weird side-effect of this: a readme (etc) file will be included
  17. // if it exists anywhere within a folder with a package.json file.
  18. // The original intent was only to include these files in the root,
  19. // but now users in the wild are dependent on that behavior for
  20. // localized documentation and other use cases. Adding a `/` to
  21. // these rules, while tempting and arguably more "correct", is a
  22. // significant change that will break existing use cases.
  23. const packageMustHaveFileNames = 'readme|copying|license|licence'
  24. const packageMustHaves = `@(${packageMustHaveFileNames}){,.*[^~$]}`
  25. const packageMustHavesRE = new RegExp(`^(${packageMustHaveFileNames})(\\..*[^~$])?$`, 'i')
  26. const fs = require('fs')
  27. const glob = require('glob')
  28. const defaultRules = [
  29. '.npmignore',
  30. '.gitignore',
  31. '**/.git',
  32. '**/.svn',
  33. '**/.hg',
  34. '**/CVS',
  35. '**/.git/**',
  36. '**/.svn/**',
  37. '**/.hg/**',
  38. '**/CVS/**',
  39. '/.lock-wscript',
  40. '/.wafpickle-*',
  41. '/build/config.gypi',
  42. 'npm-debug.log',
  43. '**/.npmrc',
  44. '.*.swp',
  45. '.DS_Store',
  46. '**/.DS_Store/**',
  47. '._*',
  48. '**/._*/**',
  49. '*.orig',
  50. '/package-lock.json',
  51. '/yarn.lock',
  52. '/archived-packages/**',
  53. ]
  54. // There may be others, but :?|<> are handled by node-tar
  55. const nameIsBadForWindows = file => /\*/.test(file)
  56. // a decorator that applies our custom rules to an ignore walker
  57. const npmWalker = Class => class Walker extends Class {
  58. constructor (opt) {
  59. opt = opt || {}
  60. // the order in which rules are applied.
  61. opt.ignoreFiles = [
  62. rootBuiltinRules,
  63. 'package.json',
  64. '.npmignore',
  65. '.gitignore',
  66. packageNecessaryRules,
  67. ]
  68. opt.includeEmpty = false
  69. opt.path = opt.path || process.cwd()
  70. // only follow links in the root node_modules folder, because if those
  71. // folders are included, it's because they're bundled, and bundles
  72. // should include the contents, not the symlinks themselves.
  73. // This regexp tests to see that we're either a node_modules folder,
  74. // or a @scope within a node_modules folder, in the root's node_modules
  75. // hierarchy (ie, not in test/foo/node_modules/ or something).
  76. const followRe = /^(?:\/node_modules\/(?:@[^/]+\/[^/]+|[^/]+)\/)*\/node_modules(?:\/@[^/]+)?$/
  77. const rootPath = opt.parent ? opt.parent.root : opt.path
  78. const followTestPath = opt.path.replace(/\\/g, '/').substr(rootPath.length)
  79. opt.follow = followRe.test(followTestPath)
  80. super(opt)
  81. // ignore a bunch of things by default at the root level.
  82. // also ignore anything in the main project node_modules hierarchy,
  83. // except bundled dependencies
  84. if (!this.parent) {
  85. this.bundled = opt.bundled || []
  86. this.bundledScopes = Array.from(new Set(
  87. this.bundled.filter(f => /^@/.test(f))
  88. .map(f => f.split('/')[0])))
  89. const rules = defaultRules.join('\n') + '\n'
  90. this.packageJsonCache = opt.packageJsonCache || new Map()
  91. super.onReadIgnoreFile(rootBuiltinRules, rules, _ => _)
  92. } else {
  93. this.bundled = []
  94. this.bundledScopes = []
  95. this.packageJsonCache = this.parent.packageJsonCache
  96. }
  97. }
  98. onReaddir (entries) {
  99. if (!this.parent) {
  100. entries = entries.filter(e =>
  101. e !== '.git' &&
  102. !(e === 'node_modules' && this.bundled.length === 0)
  103. )
  104. }
  105. // if we have a package.json, then look in it for 'files'
  106. // we _only_ do this in the root project, not bundled deps
  107. // or other random folders. Bundled deps are always assumed
  108. // to be in the state the user wants to include them, and
  109. // a package.json somewhere else might be a template or
  110. // test or something else entirely.
  111. if (this.parent || !entries.includes('package.json'))
  112. return super.onReaddir(entries)
  113. // when the cache has been seeded with the root manifest,
  114. // we must respect that (it may differ from the filesystem)
  115. const ig = path.resolve(this.path, 'package.json')
  116. if (this.packageJsonCache.has(ig)) {
  117. const pkg = this.packageJsonCache.get(ig)
  118. // fall back to filesystem when seeded manifest is invalid
  119. if (!pkg || typeof pkg !== 'object')
  120. return this.readPackageJson(entries)
  121. // feels wonky, but this ensures package bin is _always_
  122. // normalized, as well as guarding against invalid JSON
  123. return this.getPackageFiles(entries, JSON.stringify(pkg))
  124. }
  125. this.readPackageJson(entries)
  126. }
  127. onReadPackageJson (entries, er, pkg) {
  128. if (er)
  129. this.emit('error', er)
  130. else
  131. this.getPackageFiles(entries, pkg)
  132. }
  133. mustHaveFilesFromPackage (pkg) {
  134. const files = []
  135. if (pkg.browser)
  136. files.push('/' + pkg.browser)
  137. if (pkg.main)
  138. files.push('/' + pkg.main)
  139. if (pkg.bin) {
  140. // always an object because normalized already
  141. for (const key in pkg.bin)
  142. files.push('/' + pkg.bin[key])
  143. }
  144. files.push(
  145. '/package.json',
  146. '/npm-shrinkwrap.json',
  147. '!/package-lock.json',
  148. packageMustHaves
  149. )
  150. return files
  151. }
  152. getPackageFiles (entries, pkg) {
  153. try {
  154. // XXX this could be changed to use read-package-json-fast
  155. // which handles the normalizing of bins for us, and simplifies
  156. // the test for bundleDependencies and bundledDependencies later.
  157. // HOWEVER if we do this, we need to be sure that we're careful
  158. // about what we write back out since rpj-fast removes some fields
  159. // that the user likely wants to keep. it also would add a second
  160. // file read that we would want to optimize away.
  161. pkg = normalizePackageBin(JSON.parse(pkg.toString()))
  162. } catch (er) {
  163. // not actually a valid package.json
  164. return super.onReaddir(entries)
  165. }
  166. const ig = path.resolve(this.path, 'package.json')
  167. this.packageJsonCache.set(ig, pkg)
  168. // no files list, just return the normal readdir() result
  169. if (!Array.isArray(pkg.files))
  170. return super.onReaddir(entries)
  171. pkg.files.push(...this.mustHaveFilesFromPackage(pkg))
  172. // If the package has a files list, then it's unlikely to include
  173. // node_modules, because why would you do that? but since we use
  174. // the files list as the effective readdir result, that means it
  175. // looks like we don't have a node_modules folder at all unless we
  176. // include it here.
  177. if ((pkg.bundleDependencies || pkg.bundledDependencies) && entries.includes('node_modules'))
  178. pkg.files.push('node_modules')
  179. const patterns = Array.from(new Set(pkg.files)).reduce((set, pattern) => {
  180. const excl = pattern.match(/^!+/)
  181. if (excl)
  182. pattern = pattern.substr(excl[0].length)
  183. // strip off any / from the start of the pattern. /foo => foo
  184. pattern = pattern.replace(/^\/+/, '')
  185. // an odd number of ! means a negated pattern. !!foo ==> foo
  186. const negate = excl && excl[0].length % 2 === 1
  187. set.push({ pattern, negate })
  188. return set
  189. }, [])
  190. let n = patterns.length
  191. const set = new Set()
  192. const negates = new Set()
  193. const results = []
  194. const then = (pattern, negate, er, fileList, i) => {
  195. if (er)
  196. return this.emit('error', er)
  197. results[i] = { negate, fileList }
  198. if (--n === 0)
  199. processResults(results)
  200. }
  201. const processResults = results => {
  202. for (const {negate, fileList} of results) {
  203. if (negate) {
  204. fileList.forEach(f => {
  205. f = f.replace(/\/+$/, '')
  206. set.delete(f)
  207. negates.add(f)
  208. })
  209. } else {
  210. fileList.forEach(f => {
  211. f = f.replace(/\/+$/, '')
  212. set.add(f)
  213. negates.delete(f)
  214. })
  215. }
  216. }
  217. const list = Array.from(set)
  218. // replace the files array with our computed explicit set
  219. pkg.files = list.concat(Array.from(negates).map(f => '!' + f))
  220. const rdResult = Array.from(new Set(
  221. list.map(f => f.replace(/^\/+/, ''))
  222. ))
  223. super.onReaddir(rdResult)
  224. }
  225. // maintain the index so that we process them in-order only once all
  226. // are completed, otherwise the parallelism messes things up, since a
  227. // glob like **/*.js will always be slower than a subsequent !foo.js
  228. patterns.forEach(({pattern, negate}, i) =>
  229. this.globFiles(pattern, (er, res) => then(pattern, negate, er, res, i)))
  230. }
  231. filterEntry (entry, partial) {
  232. // get the partial path from the root of the walk
  233. const p = this.path.substr(this.root.length + 1)
  234. const pkgre = /^node_modules\/(@[^/]+\/?[^/]+|[^/]+)(\/.*)?$/
  235. const isRoot = !this.parent
  236. const pkg = isRoot && pkgre.test(entry) ?
  237. entry.replace(pkgre, '$1') : null
  238. const rootNM = isRoot && entry === 'node_modules'
  239. const rootPJ = isRoot && entry === 'package.json'
  240. return (
  241. // if we're in a bundled package, check with the parent.
  242. /^node_modules($|\/)/i.test(p) ? this.parent.filterEntry(
  243. this.basename + '/' + entry, partial)
  244. // if package is bundled, all files included
  245. // also include @scope dirs for bundled scoped deps
  246. // they'll be ignored if no files end up in them.
  247. // However, this only matters if we're in the root.
  248. // node_modules folders elsewhere, like lib/node_modules,
  249. // should be included normally unless ignored.
  250. : pkg ? this.bundled.indexOf(pkg) !== -1 ||
  251. this.bundledScopes.indexOf(pkg) !== -1
  252. // only walk top node_modules if we want to bundle something
  253. : rootNM ? !!this.bundled.length
  254. // always include package.json at the root.
  255. : rootPJ ? true
  256. // always include readmes etc in any included dir
  257. : packageMustHavesRE.test(entry) ? true
  258. // npm-shrinkwrap and package.json always included in the root pkg
  259. : isRoot && (entry === 'npm-shrinkwrap.json' || entry === 'package.json')
  260. ? true
  261. // package-lock never included
  262. : isRoot && entry === 'package-lock.json' ? false
  263. // otherwise, follow ignore-walk's logic
  264. : super.filterEntry(entry, partial)
  265. )
  266. }
  267. filterEntries () {
  268. if (this.ignoreRules['.npmignore'])
  269. this.ignoreRules['.gitignore'] = null
  270. this.filterEntries = super.filterEntries
  271. super.filterEntries()
  272. }
  273. addIgnoreFile (file, then) {
  274. const ig = path.resolve(this.path, file)
  275. if (file === 'package.json' && this.parent)
  276. then()
  277. else if (this.packageJsonCache.has(ig))
  278. this.onPackageJson(ig, this.packageJsonCache.get(ig), then)
  279. else
  280. super.addIgnoreFile(file, then)
  281. }
  282. onPackageJson (ig, pkg, then) {
  283. this.packageJsonCache.set(ig, pkg)
  284. if (Array.isArray(pkg.files)) {
  285. // in this case we already included all the must-haves
  286. super.onReadIgnoreFile('package.json', pkg.files.map(
  287. f => '!' + f
  288. ).join('\n') + '\n', then)
  289. } else {
  290. // if there's a bin, browser or main, make sure we don't ignore it
  291. // also, don't ignore the package.json itself, or any files that
  292. // must be included in the package.
  293. const rules = this.mustHaveFilesFromPackage(pkg).map(f => `!${f}`)
  294. const data = rules.join('\n') + '\n'
  295. super.onReadIgnoreFile(packageNecessaryRules, data, then)
  296. }
  297. }
  298. // override parent stat function to completely skip any filenames
  299. // that will break windows entirely.
  300. // XXX(isaacs) Next major version should make this an error instead.
  301. stat (entry, file, dir, then) {
  302. if (nameIsBadForWindows(entry))
  303. then()
  304. else
  305. super.stat(entry, file, dir, then)
  306. }
  307. // override parent onstat function to nix all symlinks
  308. onstat (st, entry, file, dir, then) {
  309. if (st.isSymbolicLink())
  310. then()
  311. else
  312. super.onstat(st, entry, file, dir, then)
  313. }
  314. onReadIgnoreFile (file, data, then) {
  315. if (file === 'package.json') {
  316. try {
  317. const ig = path.resolve(this.path, file)
  318. this.onPackageJson(ig, JSON.parse(data), then)
  319. } catch (er) {
  320. // ignore package.json files that are not json
  321. then()
  322. }
  323. } else
  324. super.onReadIgnoreFile(file, data, then)
  325. }
  326. sort (a, b) {
  327. return sort(a, b)
  328. }
  329. }
  330. class Walker extends npmWalker(IgnoreWalker) {
  331. globFiles (pattern, cb) {
  332. glob(pattern, { dot: true, cwd: this.path, nocase: true }, cb)
  333. }
  334. readPackageJson (entries) {
  335. fs.readFile(this.path + '/package.json', (er, pkg) =>
  336. this.onReadPackageJson(entries, er, pkg))
  337. }
  338. walker (entry, then) {
  339. new Walker(this.walkerOpt(entry)).on('done', then).start()
  340. }
  341. }
  342. class WalkerSync extends npmWalker(IgnoreWalkerSync) {
  343. globFiles (pattern, cb) {
  344. cb(null, glob.sync(pattern, { dot: true, cwd: this.path, nocase: true }))
  345. }
  346. readPackageJson (entries) {
  347. const p = this.path + '/package.json'
  348. try {
  349. this.onReadPackageJson(entries, null, fs.readFileSync(p))
  350. } catch (er) {
  351. this.onReadPackageJson(entries, er)
  352. }
  353. }
  354. walker (entry, then) {
  355. new WalkerSync(this.walkerOpt(entry)).start()
  356. then()
  357. }
  358. }
  359. const walk = (options, callback) => {
  360. options = options || {}
  361. const p = new Promise((resolve, reject) => {
  362. const bw = new BundleWalker(options)
  363. bw.on('done', bundled => {
  364. options.bundled = bundled
  365. options.packageJsonCache = bw.packageJsonCache
  366. new Walker(options).on('done', resolve).on('error', reject).start()
  367. })
  368. bw.start()
  369. })
  370. return callback ? p.then(res => callback(null, res), callback) : p
  371. }
  372. const walkSync = options => {
  373. options = options || {}
  374. const bw = new BundleWalkerSync(options).start()
  375. options.bundled = bw.result
  376. options.packageJsonCache = bw.packageJsonCache
  377. const walker = new WalkerSync(options)
  378. walker.start()
  379. return walker.result
  380. }
  381. // optimize for compressibility
  382. // extname, then basename, then locale alphabetically
  383. // https://twitter.com/isntitvacant/status/1131094910923231232
  384. const sort = (a, b) => {
  385. const exta = path.extname(a).toLowerCase()
  386. const extb = path.extname(b).toLowerCase()
  387. const basea = path.basename(a).toLowerCase()
  388. const baseb = path.basename(b).toLowerCase()
  389. return exta.localeCompare(extb, 'en') ||
  390. basea.localeCompare(baseb, 'en') ||
  391. a.localeCompare(b, 'en')
  392. }
  393. module.exports = walk
  394. walk.sync = walkSync
  395. walk.Walker = Walker
  396. walk.WalkerSync = WalkerSync