123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509 |
- // This is the base class that the other fetcher types in lib
- // all descend from.
- // It handles the unpacking and retry logic that is shared among
- // all of the other Fetcher types.
- const npa = require('npm-package-arg')
- const ssri = require('ssri')
- const { promisify } = require('util')
- const { basename, dirname } = require('path')
- const rimraf = promisify(require('rimraf'))
- const tar = require('tar')
- const procLog = require('./util/proc-log.js')
- const retry = require('promise-retry')
- const fsm = require('fs-minipass')
- const cacache = require('cacache')
- const isPackageBin = require('./util/is-package-bin.js')
- const getContents = require('@npmcli/installed-package-contents')
- // we only change ownership on unix platforms, and only if uid is 0
- const selfOwner = process.getuid && process.getuid() === 0 ? {
- uid: 0,
- gid: process.getgid(),
- } : null
- const chownr = selfOwner ? promisify(require('chownr')) : null
- const inferOwner = selfOwner ? require('infer-owner') : null
- const mkdirp = require('mkdirp')
- const cacheDir = require('./util/cache-dir.js')
- // Private methods.
- // Child classes should not have to override these.
- // Users should never call them.
- const _chown = Symbol('_chown')
- const _extract = Symbol('_extract')
- const _mkdir = Symbol('_mkdir')
- const _empty = Symbol('_empty')
- const _toFile = Symbol('_toFile')
- const _tarxOptions = Symbol('_tarxOptions')
- const _entryMode = Symbol('_entryMode')
- const _istream = Symbol('_istream')
- const _assertType = Symbol('_assertType')
- const _tarballFromCache = Symbol('_tarballFromCache')
- const _tarballFromResolved = Symbol.for('pacote.Fetcher._tarballFromResolved')
- const _cacheFetches = Symbol.for('pacote.Fetcher._cacheFetches')
- class FetcherBase {
- constructor (spec, opts) {
- if (!opts || typeof opts !== 'object')
- throw new TypeError('options object is required')
- this.spec = npa(spec, opts.where)
- this.allowGitIgnore = !!opts.allowGitIgnore
- // a bit redundant because presumably the caller already knows this,
- // but it makes it easier to not have to keep track of the requested
- // spec when we're dispatching thousands of these at once, and normalizing
- // is nice. saveSpec is preferred if set, because it turns stuff like
- // x/y#committish into github:x/y#committish. use name@rawSpec for
- // registry deps so that we turn xyz and xyz@ -> xyz@
- this.from = this.spec.registry
- ? `${this.spec.name}@${this.spec.rawSpec}` : this.spec.saveSpec
- this[_assertType]()
- // clone the opts object so that others aren't upset when we mutate it
- // by adding/modifying the integrity value.
- this.opts = {...opts}
- this.cache = opts.cache || cacheDir()
- this.resolved = opts.resolved || null
- // default to caching/verifying with sha512, that's what we usually have
- // need to change this default, or start overriding it, when sha512
- // is no longer strong enough.
- this.defaultIntegrityAlgorithm = opts.defaultIntegrityAlgorithm || 'sha512'
- if (typeof opts.integrity === 'string')
- this.opts.integrity = ssri.parse(opts.integrity)
- this.package = null
- this.type = this.constructor.name
- this.fmode = opts.fmode || 0o666
- this.dmode = opts.dmode || 0o777
- // we don't need a default umask, because we don't chmod files coming
- // out of package tarballs. they're forced to have a mode that is
- // valid, regardless of what's in the tarball entry, and then we let
- // the process's umask setting do its job. but if configured, we do
- // respect it.
- this.umask = opts.umask || 0
- this.log = opts.log || procLog
- this.preferOnline = !!opts.preferOnline
- this.preferOffline = !!opts.preferOffline
- this.offline = !!opts.offline
- this.before = opts.before
- this.fullMetadata = this.before ? true : !!opts.fullMetadata
- this.defaultTag = opts.defaultTag || 'latest'
- this.registry = (opts.registry || 'https://registry.npmjs.org')
- .replace(/\/+$/, '')
- // command to run 'prepare' scripts on directories and git dirs
- // To use pacote with yarn, for example, set npmBin to 'yarn'
- // and npmCliConfig with yarn's equivalents.
- this.npmBin = opts.npmBin || 'npm'
- // command to install deps for preparing
- this.npmInstallCmd = opts.npmInstallCmd || [ 'install', '--force' ]
- // XXX fill more of this in based on what we know from this.opts
- // we explicitly DO NOT fill in --tag, though, since we are often
- // going to be packing in the context of a publish, which may set
- // a dist-tag, but certainly wants to keep defaulting to latest.
- this.npmCliConfig = opts.npmCliConfig || [
- `--cache=${dirname(this.cache)}`,
- `--prefer-offline=${!!this.preferOffline}`,
- `--prefer-online=${!!this.preferOnline}`,
- `--offline=${!!this.offline}`,
- ...(this.before ? [`--before=${this.before.toISOString()}`] : []),
- '--no-progress',
- '--no-save',
- '--no-audit',
- // override any omit settings from the environment
- '--include=dev',
- '--include=peer',
- '--include=optional',
- // we need the actual things, not just the lockfile
- '--no-package-lock-only',
- '--no-dry-run',
- ]
- }
- get integrity () {
- return this.opts.integrity || null
- }
- set integrity (i) {
- if (!i)
- return
- i = ssri.parse(i)
- const current = this.opts.integrity
- // do not ever update an existing hash value, but do
- // merge in NEW algos and hashes that we don't already have.
- if (current)
- current.merge(i)
- else
- this.opts.integrity = i
- }
- get notImplementedError () {
- return new Error('not implemented in this fetcher type: ' + this.type)
- }
- // override in child classes
- // Returns a Promise that resolves to this.resolved string value
- resolve () {
- return this.resolved ? Promise.resolve(this.resolved)
- : Promise.reject(this.notImplementedError)
- }
- packument () {
- return Promise.reject(this.notImplementedError)
- }
- // override in child class
- // returns a manifest containing:
- // - name
- // - version
- // - _resolved
- // - _integrity
- // - plus whatever else was in there (corgi, full metadata, or pj file)
- manifest () {
- return Promise.reject(this.notImplementedError)
- }
- // private, should be overridden.
- // Note that they should *not* calculate or check integrity or cache,
- // but *just* return the raw tarball data stream.
- [_tarballFromResolved] () {
- throw this.notImplementedError
- }
- // public, should not be overridden
- tarball () {
- return this.tarballStream(stream => stream.concat().then(data => {
- data.integrity = this.integrity && String(this.integrity)
- data.resolved = this.resolved
- data.from = this.from
- return data
- }))
- }
- // private
- // Note: cacache will raise a EINTEGRITY error if the integrity doesn't match
- [_tarballFromCache] () {
- return cacache.get.stream.byDigest(this.cache, this.integrity, this.opts)
- }
- get [_cacheFetches] () {
- return true
- }
- [_istream] (stream) {
- // everyone will need one of these, either for verifying or calculating
- // We always set it, because we have might only have a weak legacy hex
- // sha1 in the packument, and this MAY upgrade it to a stronger algo.
- // If we had an integrity, and it doesn't match, then this does not
- // override that error; the istream will raise the error before it
- // gets to the point of re-setting the integrity.
- const istream = ssri.integrityStream(this.opts)
- istream.on('integrity', i => this.integrity = i)
- stream.on('error', er => istream.emit('error', er))
- // if not caching this, just pipe through to the istream and return it
- if (!this.opts.cache || !this[_cacheFetches])
- return stream.pipe(istream)
- // we have to return a stream that gets ALL the data, and proxies errors,
- // but then pipe from the original tarball stream into the cache as well.
- // To do this without losing any data, and since the cacache put stream
- // is not a passthrough, we have to pipe from the original stream into
- // the cache AFTER we pipe into the istream. Since the cache stream
- // has an asynchronous flush to write its contents to disk, we need to
- // defer the istream end until the cache stream ends.
- stream.pipe(istream, { end: false })
- const cstream = cacache.put.stream(
- this.opts.cache,
- `pacote:tarball:${this.from}`,
- this.opts
- )
- stream.pipe(cstream)
- // defer istream end until after cstream
- // cache write errors should not crash the fetch, this is best-effort.
- cstream.promise().catch(() => {}).then(() => istream.end())
- return istream
- }
- pickIntegrityAlgorithm () {
- return this.integrity ? this.integrity.pickAlgorithm(this.opts)
- : this.defaultIntegrityAlgorithm
- }
- // TODO: check error class, once those are rolled out to our deps
- isDataCorruptionError (er) {
- return er.code === 'EINTEGRITY' || er.code === 'Z_DATA_ERROR'
- }
- // override the types getter
- get types () {}
- [_assertType] () {
- if (this.types && !this.types.includes(this.spec.type)) {
- throw new TypeError(`Wrong spec type (${
- this.spec.type
- }) for ${
- this.constructor.name
- }. Supported types: ${this.types.join(', ')}`)
- }
- }
- // We allow ENOENTs from cacache, but not anywhere else.
- // An ENOENT trying to read a tgz file, for example, is Right Out.
- isRetriableError (er) {
- // TODO: check error class, once those are rolled out to our deps
- return this.isDataCorruptionError(er) ||
- er.code === 'ENOENT' ||
- er.code === 'EISDIR'
- }
- // Mostly internal, but has some uses
- // Pass in a function which returns a promise
- // Function will be called 1 or more times with streams that may fail.
- // Retries:
- // Function MUST handle errors on the stream by rejecting the promise,
- // so that retry logic can pick it up and either retry or fail whatever
- // promise it was making (ie, failing extraction, etc.)
- //
- // The return value of this method is a Promise that resolves the same
- // as whatever the streamHandler resolves to.
- //
- // This should never be overridden by child classes, but it is public.
- tarballStream (streamHandler) {
- // Only short-circuit via cache if we have everything else we'll need,
- // and the user has not expressed a preference for checking online.
- const fromCache = (
- !this.preferOnline &&
- this.integrity &&
- this.resolved
- ) ? streamHandler(this[_tarballFromCache]()).catch(er => {
- if (this.isDataCorruptionError(er)) {
- this.log.warn('tarball', `cached data for ${
- this.spec
- } (${this.integrity}) seems to be corrupted. Refreshing cache.`)
- return this.cleanupCached().then(() => { throw er })
- } else {
- throw er
- }
- }) : null
- const fromResolved = er => {
- if (er) {
- if (!this.isRetriableError(er))
- throw er
- this.log.silly('tarball', `no local data for ${
- this.spec
- }. Extracting by manifest.`)
- }
- return this.resolve().then(() => retry(tryAgain =>
- streamHandler(this[_istream](this[_tarballFromResolved]()))
- .catch(er => {
- // Most likely data integrity. A cache ENOENT error is unlikely
- // here, since we're definitely not reading from the cache, but it
- // IS possible that the fetch subsystem accessed the cache, and the
- // entry got blown away or something. Try one more time to be sure.
- if (this.isRetriableError(er)) {
- this.log.warn('tarball', `tarball data for ${
- this.spec
- } (${this.integrity}) seems to be corrupted. Trying again.`)
- return this.cleanupCached().then(() => tryAgain(er))
- }
- throw er
- }), { retries: 1, minTimeout: 0, maxTimeout: 0 }))
- }
- return fromCache ? fromCache.catch(fromResolved) : fromResolved()
- }
- cleanupCached () {
- return cacache.rm.content(this.cache, this.integrity, this.opts)
- }
- async [_chown] (path, uid, gid) {
- return selfOwner && (selfOwner.gid !== gid || selfOwner.uid !== uid)
- ? chownr(path, uid, gid)
- : /* istanbul ignore next - we don't test in root-owned folders */ null
- }
- [_empty] (path) {
- return getContents({path, depth: 1}).then(contents => Promise.all(
- contents.map(entry => rimraf(entry))))
- }
- [_mkdir] (dest) {
- // if we're bothering to do owner inference, then do it.
- // otherwise just make the dir, and return an empty object.
- // always empty the dir dir to start with, but do so
- // _after_ inferring the owner, in case there's an existing folder
- // there that we would want to preserve which differs from the
- // parent folder (rare, but probably happens sometimes).
- return !inferOwner
- ? this[_empty](dest).then(() => mkdirp(dest)).then(() => ({}))
- : inferOwner(dest).then(({uid, gid}) =>
- this[_empty](dest)
- .then(() => mkdirp(dest))
- .then(made => {
- // ignore the || dest part in coverage. It's there to handle
- // race conditions where the dir may be made by someone else
- // after being removed by us.
- const dir = made || /* istanbul ignore next */ dest
- return this[_chown](dir, uid, gid)
- })
- .then(() => ({uid, gid})))
- }
- // extraction is always the same. the only difference is where
- // the tarball comes from.
- extract (dest) {
- return this[_mkdir](dest).then(({uid, gid}) =>
- this.tarballStream(tarball => this[_extract](dest, tarball, uid, gid)))
- }
- [_toFile] (dest) {
- return this.tarballStream(str => new Promise((res, rej) => {
- const writer = new fsm.WriteStream(dest)
- str.on('error', er => writer.emit('error', er))
- writer.on('error', er => rej(er))
- writer.on('close', () => res({
- integrity: this.integrity && String(this.integrity),
- resolved: this.resolved,
- from: this.from,
- }))
- str.pipe(writer)
- }))
- }
- // don't use this[_mkdir] because we don't want to rimraf anything
- tarballFile (dest) {
- const dir = dirname(dest)
- return !inferOwner
- ? mkdirp(dir).then(() => this[_toFile](dest))
- : inferOwner(dest).then(({uid, gid}) =>
- mkdirp(dir).then(made => this[_toFile](dest)
- .then(res => this[_chown](made || dir, uid, gid)
- .then(() => res))))
- }
- [_extract] (dest, tarball, uid, gid) {
- const extractor = tar.x(this[_tarxOptions]({ cwd: dest, uid, gid }))
- const p = new Promise((resolve, reject) => {
- extractor.on('end', () => {
- resolve({
- resolved: this.resolved,
- integrity: this.integrity && String(this.integrity),
- from: this.from,
- })
- })
- extractor.on('error', er => {
- this.log.warn('tar', er.message)
- this.log.silly('tar', er)
- reject(er)
- })
- tarball.on('error', er => reject(er))
- })
- tarball.pipe(extractor)
- return p
- }
- // always ensure that entries are at least as permissive as our configured
- // dmode/fmode, but never more permissive than the umask allows.
- [_entryMode] (path, mode, type) {
- const m = /Directory|GNUDumpDir/.test(type) ? this.dmode
- : /File$/.test(type) ? this.fmode
- : /* istanbul ignore next - should never happen in a pkg */ 0
- // make sure package bins are executable
- const exe = isPackageBin(this.package, path) ? 0o111 : 0
- // always ensure that files are read/writable by the owner
- return ((mode | m) & ~this.umask) | exe | 0o600
- }
- [_tarxOptions] ({ cwd, uid, gid }) {
- const sawIgnores = new Set()
- return {
- cwd,
- noChmod: true,
- noMtime: true,
- filter: (name, entry) => {
- if (/Link$/.test(entry.type))
- return false
- entry.mode = this[_entryMode](entry.path, entry.mode, entry.type)
- // this replicates the npm pack behavior where .gitignore files
- // are treated like .npmignore files, but only if a .npmignore
- // file is not present.
- if (/File$/.test(entry.type)) {
- const base = basename(entry.path)
- if (base === '.npmignore')
- sawIgnores.add(entry.path)
- else if (base === '.gitignore' && !this.allowGitIgnore) {
- // rename, but only if there's not already a .npmignore
- const ni = entry.path.replace(/\.gitignore$/, '.npmignore')
- if (sawIgnores.has(ni))
- return false
- entry.path = ni
- }
- return true
- }
- },
- strip: 1,
- onwarn: /* istanbul ignore next - we can trust that tar logs */
- (code, msg, data) => {
- this.log.warn('tar', code, msg)
- this.log.silly('tar', code, msg, data)
- },
- uid,
- gid,
- umask: this.umask,
- }
- }
- }
- module.exports = FetcherBase
- // Child classes
- const GitFetcher = require('./git.js')
- const RegistryFetcher = require('./registry.js')
- const FileFetcher = require('./file.js')
- const DirFetcher = require('./dir.js')
- const RemoteFetcher = require('./remote.js')
- // Get an appropriate fetcher object from a spec and options
- FetcherBase.get = (rawSpec, opts = {}) => {
- const spec = npa(rawSpec, opts.where)
- switch (spec.type) {
- case 'git':
- return new GitFetcher(spec, opts)
- case 'remote':
- return new RemoteFetcher(spec, opts)
- case 'version':
- case 'range':
- case 'tag':
- case 'alias':
- return new RegistryFetcher(spec.subSpec || spec, opts)
- case 'file':
- return new FileFetcher(spec, opts)
- case 'directory':
- return new DirFetcher(spec, opts)
- default:
- throw new TypeError('Unknown spec type: ' + spec.type)
- }
- }
|