fetcher.js 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509
  1. // This is the base class that the other fetcher types in lib
  2. // all descend from.
  3. // It handles the unpacking and retry logic that is shared among
  4. // all of the other Fetcher types.
  5. const npa = require('npm-package-arg')
  6. const ssri = require('ssri')
  7. const { promisify } = require('util')
  8. const { basename, dirname } = require('path')
  9. const rimraf = promisify(require('rimraf'))
  10. const tar = require('tar')
  11. const procLog = require('./util/proc-log.js')
  12. const retry = require('promise-retry')
  13. const fsm = require('fs-minipass')
  14. const cacache = require('cacache')
  15. const isPackageBin = require('./util/is-package-bin.js')
  16. const getContents = require('@npmcli/installed-package-contents')
  17. // we only change ownership on unix platforms, and only if uid is 0
  18. const selfOwner = process.getuid && process.getuid() === 0 ? {
  19. uid: 0,
  20. gid: process.getgid(),
  21. } : null
  22. const chownr = selfOwner ? promisify(require('chownr')) : null
  23. const inferOwner = selfOwner ? require('infer-owner') : null
  24. const mkdirp = require('mkdirp')
  25. const cacheDir = require('./util/cache-dir.js')
  26. // Private methods.
  27. // Child classes should not have to override these.
  28. // Users should never call them.
  29. const _chown = Symbol('_chown')
  30. const _extract = Symbol('_extract')
  31. const _mkdir = Symbol('_mkdir')
  32. const _empty = Symbol('_empty')
  33. const _toFile = Symbol('_toFile')
  34. const _tarxOptions = Symbol('_tarxOptions')
  35. const _entryMode = Symbol('_entryMode')
  36. const _istream = Symbol('_istream')
  37. const _assertType = Symbol('_assertType')
  38. const _tarballFromCache = Symbol('_tarballFromCache')
  39. const _tarballFromResolved = Symbol.for('pacote.Fetcher._tarballFromResolved')
  40. const _cacheFetches = Symbol.for('pacote.Fetcher._cacheFetches')
  41. class FetcherBase {
  42. constructor (spec, opts) {
  43. if (!opts || typeof opts !== 'object')
  44. throw new TypeError('options object is required')
  45. this.spec = npa(spec, opts.where)
  46. this.allowGitIgnore = !!opts.allowGitIgnore
  47. // a bit redundant because presumably the caller already knows this,
  48. // but it makes it easier to not have to keep track of the requested
  49. // spec when we're dispatching thousands of these at once, and normalizing
  50. // is nice. saveSpec is preferred if set, because it turns stuff like
  51. // x/y#committish into github:x/y#committish. use name@rawSpec for
  52. // registry deps so that we turn xyz and xyz@ -> xyz@
  53. this.from = this.spec.registry
  54. ? `${this.spec.name}@${this.spec.rawSpec}` : this.spec.saveSpec
  55. this[_assertType]()
  56. // clone the opts object so that others aren't upset when we mutate it
  57. // by adding/modifying the integrity value.
  58. this.opts = {...opts}
  59. this.cache = opts.cache || cacheDir()
  60. this.resolved = opts.resolved || null
  61. // default to caching/verifying with sha512, that's what we usually have
  62. // need to change this default, or start overriding it, when sha512
  63. // is no longer strong enough.
  64. this.defaultIntegrityAlgorithm = opts.defaultIntegrityAlgorithm || 'sha512'
  65. if (typeof opts.integrity === 'string')
  66. this.opts.integrity = ssri.parse(opts.integrity)
  67. this.package = null
  68. this.type = this.constructor.name
  69. this.fmode = opts.fmode || 0o666
  70. this.dmode = opts.dmode || 0o777
  71. // we don't need a default umask, because we don't chmod files coming
  72. // out of package tarballs. they're forced to have a mode that is
  73. // valid, regardless of what's in the tarball entry, and then we let
  74. // the process's umask setting do its job. but if configured, we do
  75. // respect it.
  76. this.umask = opts.umask || 0
  77. this.log = opts.log || procLog
  78. this.preferOnline = !!opts.preferOnline
  79. this.preferOffline = !!opts.preferOffline
  80. this.offline = !!opts.offline
  81. this.before = opts.before
  82. this.fullMetadata = this.before ? true : !!opts.fullMetadata
  83. this.defaultTag = opts.defaultTag || 'latest'
  84. this.registry = (opts.registry || 'https://registry.npmjs.org')
  85. .replace(/\/+$/, '')
  86. // command to run 'prepare' scripts on directories and git dirs
  87. // To use pacote with yarn, for example, set npmBin to 'yarn'
  88. // and npmCliConfig with yarn's equivalents.
  89. this.npmBin = opts.npmBin || 'npm'
  90. // command to install deps for preparing
  91. this.npmInstallCmd = opts.npmInstallCmd || [ 'install', '--force' ]
  92. // XXX fill more of this in based on what we know from this.opts
  93. // we explicitly DO NOT fill in --tag, though, since we are often
  94. // going to be packing in the context of a publish, which may set
  95. // a dist-tag, but certainly wants to keep defaulting to latest.
  96. this.npmCliConfig = opts.npmCliConfig || [
  97. `--cache=${dirname(this.cache)}`,
  98. `--prefer-offline=${!!this.preferOffline}`,
  99. `--prefer-online=${!!this.preferOnline}`,
  100. `--offline=${!!this.offline}`,
  101. ...(this.before ? [`--before=${this.before.toISOString()}`] : []),
  102. '--no-progress',
  103. '--no-save',
  104. '--no-audit',
  105. // override any omit settings from the environment
  106. '--include=dev',
  107. '--include=peer',
  108. '--include=optional',
  109. // we need the actual things, not just the lockfile
  110. '--no-package-lock-only',
  111. '--no-dry-run',
  112. ]
  113. }
  114. get integrity () {
  115. return this.opts.integrity || null
  116. }
  117. set integrity (i) {
  118. if (!i)
  119. return
  120. i = ssri.parse(i)
  121. const current = this.opts.integrity
  122. // do not ever update an existing hash value, but do
  123. // merge in NEW algos and hashes that we don't already have.
  124. if (current)
  125. current.merge(i)
  126. else
  127. this.opts.integrity = i
  128. }
  129. get notImplementedError () {
  130. return new Error('not implemented in this fetcher type: ' + this.type)
  131. }
  132. // override in child classes
  133. // Returns a Promise that resolves to this.resolved string value
  134. resolve () {
  135. return this.resolved ? Promise.resolve(this.resolved)
  136. : Promise.reject(this.notImplementedError)
  137. }
  138. packument () {
  139. return Promise.reject(this.notImplementedError)
  140. }
  141. // override in child class
  142. // returns a manifest containing:
  143. // - name
  144. // - version
  145. // - _resolved
  146. // - _integrity
  147. // - plus whatever else was in there (corgi, full metadata, or pj file)
  148. manifest () {
  149. return Promise.reject(this.notImplementedError)
  150. }
  151. // private, should be overridden.
  152. // Note that they should *not* calculate or check integrity or cache,
  153. // but *just* return the raw tarball data stream.
  154. [_tarballFromResolved] () {
  155. throw this.notImplementedError
  156. }
  157. // public, should not be overridden
  158. tarball () {
  159. return this.tarballStream(stream => stream.concat().then(data => {
  160. data.integrity = this.integrity && String(this.integrity)
  161. data.resolved = this.resolved
  162. data.from = this.from
  163. return data
  164. }))
  165. }
  166. // private
  167. // Note: cacache will raise a EINTEGRITY error if the integrity doesn't match
  168. [_tarballFromCache] () {
  169. return cacache.get.stream.byDigest(this.cache, this.integrity, this.opts)
  170. }
  171. get [_cacheFetches] () {
  172. return true
  173. }
  174. [_istream] (stream) {
  175. // everyone will need one of these, either for verifying or calculating
  176. // We always set it, because we have might only have a weak legacy hex
  177. // sha1 in the packument, and this MAY upgrade it to a stronger algo.
  178. // If we had an integrity, and it doesn't match, then this does not
  179. // override that error; the istream will raise the error before it
  180. // gets to the point of re-setting the integrity.
  181. const istream = ssri.integrityStream(this.opts)
  182. istream.on('integrity', i => this.integrity = i)
  183. stream.on('error', er => istream.emit('error', er))
  184. // if not caching this, just pipe through to the istream and return it
  185. if (!this.opts.cache || !this[_cacheFetches])
  186. return stream.pipe(istream)
  187. // we have to return a stream that gets ALL the data, and proxies errors,
  188. // but then pipe from the original tarball stream into the cache as well.
  189. // To do this without losing any data, and since the cacache put stream
  190. // is not a passthrough, we have to pipe from the original stream into
  191. // the cache AFTER we pipe into the istream. Since the cache stream
  192. // has an asynchronous flush to write its contents to disk, we need to
  193. // defer the istream end until the cache stream ends.
  194. stream.pipe(istream, { end: false })
  195. const cstream = cacache.put.stream(
  196. this.opts.cache,
  197. `pacote:tarball:${this.from}`,
  198. this.opts
  199. )
  200. stream.pipe(cstream)
  201. // defer istream end until after cstream
  202. // cache write errors should not crash the fetch, this is best-effort.
  203. cstream.promise().catch(() => {}).then(() => istream.end())
  204. return istream
  205. }
  206. pickIntegrityAlgorithm () {
  207. return this.integrity ? this.integrity.pickAlgorithm(this.opts)
  208. : this.defaultIntegrityAlgorithm
  209. }
  210. // TODO: check error class, once those are rolled out to our deps
  211. isDataCorruptionError (er) {
  212. return er.code === 'EINTEGRITY' || er.code === 'Z_DATA_ERROR'
  213. }
  214. // override the types getter
  215. get types () {}
  216. [_assertType] () {
  217. if (this.types && !this.types.includes(this.spec.type)) {
  218. throw new TypeError(`Wrong spec type (${
  219. this.spec.type
  220. }) for ${
  221. this.constructor.name
  222. }. Supported types: ${this.types.join(', ')}`)
  223. }
  224. }
  225. // We allow ENOENTs from cacache, but not anywhere else.
  226. // An ENOENT trying to read a tgz file, for example, is Right Out.
  227. isRetriableError (er) {
  228. // TODO: check error class, once those are rolled out to our deps
  229. return this.isDataCorruptionError(er) ||
  230. er.code === 'ENOENT' ||
  231. er.code === 'EISDIR'
  232. }
  233. // Mostly internal, but has some uses
  234. // Pass in a function which returns a promise
  235. // Function will be called 1 or more times with streams that may fail.
  236. // Retries:
  237. // Function MUST handle errors on the stream by rejecting the promise,
  238. // so that retry logic can pick it up and either retry or fail whatever
  239. // promise it was making (ie, failing extraction, etc.)
  240. //
  241. // The return value of this method is a Promise that resolves the same
  242. // as whatever the streamHandler resolves to.
  243. //
  244. // This should never be overridden by child classes, but it is public.
  245. tarballStream (streamHandler) {
  246. // Only short-circuit via cache if we have everything else we'll need,
  247. // and the user has not expressed a preference for checking online.
  248. const fromCache = (
  249. !this.preferOnline &&
  250. this.integrity &&
  251. this.resolved
  252. ) ? streamHandler(this[_tarballFromCache]()).catch(er => {
  253. if (this.isDataCorruptionError(er)) {
  254. this.log.warn('tarball', `cached data for ${
  255. this.spec
  256. } (${this.integrity}) seems to be corrupted. Refreshing cache.`)
  257. return this.cleanupCached().then(() => { throw er })
  258. } else {
  259. throw er
  260. }
  261. }) : null
  262. const fromResolved = er => {
  263. if (er) {
  264. if (!this.isRetriableError(er))
  265. throw er
  266. this.log.silly('tarball', `no local data for ${
  267. this.spec
  268. }. Extracting by manifest.`)
  269. }
  270. return this.resolve().then(() => retry(tryAgain =>
  271. streamHandler(this[_istream](this[_tarballFromResolved]()))
  272. .catch(er => {
  273. // Most likely data integrity. A cache ENOENT error is unlikely
  274. // here, since we're definitely not reading from the cache, but it
  275. // IS possible that the fetch subsystem accessed the cache, and the
  276. // entry got blown away or something. Try one more time to be sure.
  277. if (this.isRetriableError(er)) {
  278. this.log.warn('tarball', `tarball data for ${
  279. this.spec
  280. } (${this.integrity}) seems to be corrupted. Trying again.`)
  281. return this.cleanupCached().then(() => tryAgain(er))
  282. }
  283. throw er
  284. }), { retries: 1, minTimeout: 0, maxTimeout: 0 }))
  285. }
  286. return fromCache ? fromCache.catch(fromResolved) : fromResolved()
  287. }
  288. cleanupCached () {
  289. return cacache.rm.content(this.cache, this.integrity, this.opts)
  290. }
  291. async [_chown] (path, uid, gid) {
  292. return selfOwner && (selfOwner.gid !== gid || selfOwner.uid !== uid)
  293. ? chownr(path, uid, gid)
  294. : /* istanbul ignore next - we don't test in root-owned folders */ null
  295. }
  296. [_empty] (path) {
  297. return getContents({path, depth: 1}).then(contents => Promise.all(
  298. contents.map(entry => rimraf(entry))))
  299. }
  300. [_mkdir] (dest) {
  301. // if we're bothering to do owner inference, then do it.
  302. // otherwise just make the dir, and return an empty object.
  303. // always empty the dir dir to start with, but do so
  304. // _after_ inferring the owner, in case there's an existing folder
  305. // there that we would want to preserve which differs from the
  306. // parent folder (rare, but probably happens sometimes).
  307. return !inferOwner
  308. ? this[_empty](dest).then(() => mkdirp(dest)).then(() => ({}))
  309. : inferOwner(dest).then(({uid, gid}) =>
  310. this[_empty](dest)
  311. .then(() => mkdirp(dest))
  312. .then(made => {
  313. // ignore the || dest part in coverage. It's there to handle
  314. // race conditions where the dir may be made by someone else
  315. // after being removed by us.
  316. const dir = made || /* istanbul ignore next */ dest
  317. return this[_chown](dir, uid, gid)
  318. })
  319. .then(() => ({uid, gid})))
  320. }
  321. // extraction is always the same. the only difference is where
  322. // the tarball comes from.
  323. extract (dest) {
  324. return this[_mkdir](dest).then(({uid, gid}) =>
  325. this.tarballStream(tarball => this[_extract](dest, tarball, uid, gid)))
  326. }
  327. [_toFile] (dest) {
  328. return this.tarballStream(str => new Promise((res, rej) => {
  329. const writer = new fsm.WriteStream(dest)
  330. str.on('error', er => writer.emit('error', er))
  331. writer.on('error', er => rej(er))
  332. writer.on('close', () => res({
  333. integrity: this.integrity && String(this.integrity),
  334. resolved: this.resolved,
  335. from: this.from,
  336. }))
  337. str.pipe(writer)
  338. }))
  339. }
  340. // don't use this[_mkdir] because we don't want to rimraf anything
  341. tarballFile (dest) {
  342. const dir = dirname(dest)
  343. return !inferOwner
  344. ? mkdirp(dir).then(() => this[_toFile](dest))
  345. : inferOwner(dest).then(({uid, gid}) =>
  346. mkdirp(dir).then(made => this[_toFile](dest)
  347. .then(res => this[_chown](made || dir, uid, gid)
  348. .then(() => res))))
  349. }
  350. [_extract] (dest, tarball, uid, gid) {
  351. const extractor = tar.x(this[_tarxOptions]({ cwd: dest, uid, gid }))
  352. const p = new Promise((resolve, reject) => {
  353. extractor.on('end', () => {
  354. resolve({
  355. resolved: this.resolved,
  356. integrity: this.integrity && String(this.integrity),
  357. from: this.from,
  358. })
  359. })
  360. extractor.on('error', er => {
  361. this.log.warn('tar', er.message)
  362. this.log.silly('tar', er)
  363. reject(er)
  364. })
  365. tarball.on('error', er => reject(er))
  366. })
  367. tarball.pipe(extractor)
  368. return p
  369. }
  370. // always ensure that entries are at least as permissive as our configured
  371. // dmode/fmode, but never more permissive than the umask allows.
  372. [_entryMode] (path, mode, type) {
  373. const m = /Directory|GNUDumpDir/.test(type) ? this.dmode
  374. : /File$/.test(type) ? this.fmode
  375. : /* istanbul ignore next - should never happen in a pkg */ 0
  376. // make sure package bins are executable
  377. const exe = isPackageBin(this.package, path) ? 0o111 : 0
  378. // always ensure that files are read/writable by the owner
  379. return ((mode | m) & ~this.umask) | exe | 0o600
  380. }
  381. [_tarxOptions] ({ cwd, uid, gid }) {
  382. const sawIgnores = new Set()
  383. return {
  384. cwd,
  385. noChmod: true,
  386. noMtime: true,
  387. filter: (name, entry) => {
  388. if (/Link$/.test(entry.type))
  389. return false
  390. entry.mode = this[_entryMode](entry.path, entry.mode, entry.type)
  391. // this replicates the npm pack behavior where .gitignore files
  392. // are treated like .npmignore files, but only if a .npmignore
  393. // file is not present.
  394. if (/File$/.test(entry.type)) {
  395. const base = basename(entry.path)
  396. if (base === '.npmignore')
  397. sawIgnores.add(entry.path)
  398. else if (base === '.gitignore' && !this.allowGitIgnore) {
  399. // rename, but only if there's not already a .npmignore
  400. const ni = entry.path.replace(/\.gitignore$/, '.npmignore')
  401. if (sawIgnores.has(ni))
  402. return false
  403. entry.path = ni
  404. }
  405. return true
  406. }
  407. },
  408. strip: 1,
  409. onwarn: /* istanbul ignore next - we can trust that tar logs */
  410. (code, msg, data) => {
  411. this.log.warn('tar', code, msg)
  412. this.log.silly('tar', code, msg, data)
  413. },
  414. uid,
  415. gid,
  416. umask: this.umask,
  417. }
  418. }
  419. }
  420. module.exports = FetcherBase
  421. // Child classes
  422. const GitFetcher = require('./git.js')
  423. const RegistryFetcher = require('./registry.js')
  424. const FileFetcher = require('./file.js')
  425. const DirFetcher = require('./dir.js')
  426. const RemoteFetcher = require('./remote.js')
  427. // Get an appropriate fetcher object from a spec and options
  428. FetcherBase.get = (rawSpec, opts = {}) => {
  429. const spec = npa(rawSpec, opts.where)
  430. switch (spec.type) {
  431. case 'git':
  432. return new GitFetcher(spec, opts)
  433. case 'remote':
  434. return new RemoteFetcher(spec, opts)
  435. case 'version':
  436. case 'range':
  437. case 'tag':
  438. case 'alias':
  439. return new RegistryFetcher(spec.subSpec || spec, opts)
  440. case 'file':
  441. return new FileFetcher(spec, opts)
  442. case 'directory':
  443. return new DirFetcher(spec, opts)
  444. default:
  445. throw new TypeError('Unknown spec type: ' + spec.type)
  446. }
  447. }