entry.js 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460
  1. const { Request, Response } = require('minipass-fetch')
  2. const Minipass = require('minipass')
  3. const MinipassCollect = require('minipass-collect')
  4. const MinipassFlush = require('minipass-flush')
  5. const MinipassPipeline = require('minipass-pipeline')
  6. const cacache = require('cacache')
  7. const url = require('url')
  8. const CachePolicy = require('./policy.js')
  9. const cacheKey = require('./key.js')
  10. const remote = require('../remote.js')
  11. const hasOwnProperty = (obj, prop) => Object.prototype.hasOwnProperty.call(obj, prop)
  12. // maximum amount of data we will buffer into memory
  13. // if we'll exceed this, we switch to streaming
  14. const MAX_MEM_SIZE = 5 * 1024 * 1024 // 5MB
  15. // allow list for request headers that will be written to the cache index
  16. // note: we will also store any request headers
  17. // that are named in a response's vary header
  18. const KEEP_REQUEST_HEADERS = [
  19. 'accept-charset',
  20. 'accept-encoding',
  21. 'accept-language',
  22. 'accept',
  23. 'cache-control',
  24. ]
  25. // allow list for response headers that will be written to the cache index
  26. // note: we must not store the real response's age header, or when we load
  27. // a cache policy based on the metadata it will think the cached response
  28. // is always stale
  29. const KEEP_RESPONSE_HEADERS = [
  30. 'cache-control',
  31. 'content-encoding',
  32. 'content-language',
  33. 'content-type',
  34. 'date',
  35. 'etag',
  36. 'expires',
  37. 'last-modified',
  38. 'location',
  39. 'pragma',
  40. 'vary',
  41. ]
  42. // return an object containing all metadata to be written to the index
  43. const getMetadata = (request, response, options) => {
  44. const metadata = {
  45. time: Date.now(),
  46. url: request.url,
  47. reqHeaders: {},
  48. resHeaders: {},
  49. }
  50. // only save the status if it's not a 200 or 304
  51. if (response.status !== 200 && response.status !== 304)
  52. metadata.status = response.status
  53. for (const name of KEEP_REQUEST_HEADERS) {
  54. if (request.headers.has(name))
  55. metadata.reqHeaders[name] = request.headers.get(name)
  56. }
  57. // if the request's host header differs from the host in the url
  58. // we need to keep it, otherwise it's just noise and we ignore it
  59. const host = request.headers.get('host')
  60. const parsedUrl = new url.URL(request.url)
  61. if (host && parsedUrl.host !== host)
  62. metadata.reqHeaders.host = host
  63. // if the response has a vary header, make sure
  64. // we store the relevant request headers too
  65. if (response.headers.has('vary')) {
  66. const vary = response.headers.get('vary')
  67. // a vary of "*" means every header causes a different response.
  68. // in that scenario, we do not include any additional headers
  69. // as the freshness check will always fail anyway and we don't
  70. // want to bloat the cache indexes
  71. if (vary !== '*') {
  72. // copy any other request headers that will vary the response
  73. const varyHeaders = vary.trim().toLowerCase().split(/\s*,\s*/)
  74. for (const name of varyHeaders) {
  75. // explicitly ignore accept-encoding here
  76. if (name !== 'accept-encoding' && request.headers.has(name))
  77. metadata.reqHeaders[name] = request.headers.get(name)
  78. }
  79. }
  80. }
  81. for (const name of KEEP_RESPONSE_HEADERS) {
  82. if (response.headers.has(name))
  83. metadata.resHeaders[name] = response.headers.get(name)
  84. }
  85. // we only store accept-encoding and content-encoding if the user
  86. // has disabled automatic compression and decompression in minipass-fetch
  87. // since if it's enabled (the default) then the content will have
  88. // already been decompressed making the header a lie
  89. if (options.compress === false) {
  90. metadata.reqHeaders['accept-encoding'] = request.headers.get('accept-encoding')
  91. metadata.resHeaders['content-encoding'] = response.headers.get('content-encoding')
  92. }
  93. return metadata
  94. }
  95. // symbols used to hide objects that may be lazily evaluated in a getter
  96. const _request = Symbol('request')
  97. const _response = Symbol('response')
  98. const _policy = Symbol('policy')
  99. class CacheEntry {
  100. constructor ({ entry, request, response, options }) {
  101. if (entry) {
  102. this.key = entry.key
  103. this.entry = entry
  104. // previous versions of this module didn't write an explicit timestamp in
  105. // the metadata, so fall back to the entry's timestamp. we can't use the
  106. // entry timestamp to determine staleness because cacache will update it
  107. // when it verifies its data
  108. this.entry.metadata.time = this.entry.metadata.time || this.entry.time
  109. } else
  110. this.key = cacheKey(request)
  111. this.options = options
  112. // these properties are behind getters that lazily evaluate
  113. this[_request] = request
  114. this[_response] = response
  115. this[_policy] = null
  116. }
  117. // returns a CacheEntry instance that satisfies the given request
  118. // or undefined if no existing entry satisfies
  119. static async find (request, options) {
  120. try {
  121. // compacts the index and returns an array of unique entries
  122. var matches = await cacache.index.compact(options.cachePath, cacheKey(request), (A, B) => {
  123. const entryA = new CacheEntry({ entry: A, options })
  124. const entryB = new CacheEntry({ entry: B, options })
  125. return entryA.policy.satisfies(entryB.request)
  126. }, {
  127. validateEntry: (entry) => {
  128. // if an integrity is null, it needs to have a status specified
  129. if (entry.integrity === null)
  130. return !!(entry.metadata && entry.metadata.status)
  131. return true
  132. },
  133. })
  134. } catch (err) {
  135. // if the compact request fails, ignore the error and return
  136. return
  137. }
  138. // a cache mode of 'reload' means to behave as though we have no cache
  139. // on the way to the network. return undefined to allow cacheFetch to
  140. // create a brand new request no matter what.
  141. if (options.cache === 'reload')
  142. return
  143. // find the specific entry that satisfies the request
  144. let match
  145. for (const entry of matches) {
  146. const _entry = new CacheEntry({
  147. entry,
  148. options,
  149. })
  150. if (_entry.policy.satisfies(request)) {
  151. match = _entry
  152. break
  153. }
  154. }
  155. return match
  156. }
  157. // if the user made a PUT/POST/PATCH then we invalidate our
  158. // cache for the same url by deleting the index entirely
  159. static async invalidate (request, options) {
  160. const key = cacheKey(request)
  161. try {
  162. await cacache.rm.entry(options.cachePath, key, { removeFully: true })
  163. } catch (err) {
  164. // ignore errors
  165. }
  166. }
  167. get request () {
  168. if (!this[_request]) {
  169. this[_request] = new Request(this.entry.metadata.url, {
  170. method: 'GET',
  171. headers: this.entry.metadata.reqHeaders,
  172. })
  173. }
  174. return this[_request]
  175. }
  176. get response () {
  177. if (!this[_response]) {
  178. this[_response] = new Response(null, {
  179. url: this.entry.metadata.url,
  180. counter: this.options.counter,
  181. status: this.entry.metadata.status || 200,
  182. headers: {
  183. ...this.entry.metadata.resHeaders,
  184. 'content-length': this.entry.size,
  185. },
  186. })
  187. }
  188. return this[_response]
  189. }
  190. get policy () {
  191. if (!this[_policy]) {
  192. this[_policy] = new CachePolicy({
  193. entry: this.entry,
  194. request: this.request,
  195. response: this.response,
  196. options: this.options,
  197. })
  198. }
  199. return this[_policy]
  200. }
  201. // wraps the response in a pipeline that stores the data
  202. // in the cache while the user consumes it
  203. async store (status) {
  204. // if we got a status other than 200, 301, or 308,
  205. // or the CachePolicy forbid storage, append the
  206. // cache status header and return it untouched
  207. if (this.request.method !== 'GET' || ![200, 301, 308].includes(this.response.status) || !this.policy.storable()) {
  208. this.response.headers.set('x-local-cache-status', 'skip')
  209. return this.response
  210. }
  211. const size = this.response.headers.get('content-length')
  212. const fitsInMemory = !!size && Number(size) < MAX_MEM_SIZE
  213. const shouldBuffer = this.options.memoize !== false && fitsInMemory
  214. const cacheOpts = {
  215. algorithms: this.options.algorithms,
  216. metadata: getMetadata(this.request, this.response, this.options),
  217. size,
  218. memoize: fitsInMemory && this.options.memoize,
  219. }
  220. let body = null
  221. // we only set a body if the status is a 200, redirects are
  222. // stored as metadata only
  223. if (this.response.status === 200) {
  224. let cacheWriteResolve, cacheWriteReject
  225. const cacheWritePromise = new Promise((resolve, reject) => {
  226. cacheWriteResolve = resolve
  227. cacheWriteReject = reject
  228. })
  229. body = new MinipassPipeline(new MinipassFlush({
  230. flush () {
  231. return cacheWritePromise
  232. },
  233. }))
  234. let abortStream, onResume
  235. if (shouldBuffer) {
  236. // if the result fits in memory, use a collect stream to gather
  237. // the response and write it to cacache while also passing it through
  238. // to the user
  239. onResume = () => {
  240. const collector = new MinipassCollect.PassThrough()
  241. abortStream = collector
  242. collector.on('collect', (data) => {
  243. // TODO if the cache write fails, log a warning but return the response anyway
  244. cacache.put(this.options.cachePath, this.key, data, cacheOpts).then(cacheWriteResolve, cacheWriteReject)
  245. })
  246. body.unshift(collector)
  247. body.unshift(this.response.body)
  248. }
  249. } else {
  250. // if it does not fit in memory, create a tee stream and use
  251. // that to pipe to both the cache and the user simultaneously
  252. onResume = () => {
  253. const tee = new Minipass()
  254. const cacheStream = cacache.put.stream(this.options.cachePath, this.key, cacheOpts)
  255. abortStream = cacheStream
  256. tee.pipe(cacheStream)
  257. // TODO if the cache write fails, log a warning but return the response anyway
  258. cacheStream.promise().then(cacheWriteResolve, cacheWriteReject)
  259. body.unshift(tee)
  260. body.unshift(this.response.body)
  261. }
  262. }
  263. body.once('resume', onResume)
  264. body.once('end', () => body.removeListener('resume', onResume))
  265. this.response.body.on('error', (err) => {
  266. // the abortStream will either be a MinipassCollect if we buffer
  267. // or a cacache write stream, either way be sure to listen for
  268. // errors from the actual response and avoid writing data that we
  269. // know to be invalid to the cache
  270. abortStream.destroy(err)
  271. })
  272. } else
  273. await cacache.index.insert(this.options.cachePath, this.key, null, cacheOpts)
  274. // note: we do not set the x-local-cache-hash header because we do not know
  275. // the hash value until after the write to the cache completes, which doesn't
  276. // happen until after the response has been sent and it's too late to write
  277. // the header anyway
  278. this.response.headers.set('x-local-cache', encodeURIComponent(this.options.cachePath))
  279. this.response.headers.set('x-local-cache-key', encodeURIComponent(this.key))
  280. this.response.headers.set('x-local-cache-mode', shouldBuffer ? 'buffer' : 'stream')
  281. this.response.headers.set('x-local-cache-status', status)
  282. this.response.headers.set('x-local-cache-time', new Date().toISOString())
  283. const newResponse = new Response(body, {
  284. url: this.response.url,
  285. status: this.response.status,
  286. headers: this.response.headers,
  287. counter: this.options.counter,
  288. })
  289. return newResponse
  290. }
  291. // use the cached data to create a response and return it
  292. async respond (method, options, status) {
  293. let response
  294. const size = Number(this.response.headers.get('content-length'))
  295. const fitsInMemory = !!size && size < MAX_MEM_SIZE
  296. const shouldBuffer = this.options.memoize !== false && fitsInMemory
  297. if (method === 'HEAD' || [301, 308].includes(this.response.status)) {
  298. // if the request is a HEAD, or the response is a redirect,
  299. // then the metadata in the entry already includes everything
  300. // we need to build a response
  301. response = this.response
  302. } else {
  303. // we're responding with a full cached response, so create a body
  304. // that reads from cacache and attach it to a new Response
  305. const body = new Minipass()
  306. const removeOnResume = () => body.removeListener('resume', onResume)
  307. let onResume
  308. if (shouldBuffer) {
  309. onResume = async () => {
  310. removeOnResume()
  311. try {
  312. const content = await cacache.get.byDigest(this.options.cachePath, this.entry.integrity, { memoize: this.options.memoize })
  313. body.end(content)
  314. } catch (err) {
  315. if (err.code === 'EINTEGRITY')
  316. await cacache.rm.content(this.options.cachePath, this.entry.integrity, { memoize: this.options.memoize })
  317. if (err.code === 'ENOENT' || err.code === 'EINTEGRITY')
  318. await CacheEntry.invalidate(this.request, this.options)
  319. body.emit('error', err)
  320. }
  321. }
  322. } else {
  323. onResume = () => {
  324. const cacheStream = cacache.get.stream.byDigest(this.options.cachePath, this.entry.integrity, { memoize: this.options.memoize })
  325. cacheStream.on('error', async (err) => {
  326. cacheStream.pause()
  327. if (err.code === 'EINTEGRITY')
  328. await cacache.rm.content(this.options.cachePath, this.entry.integrity, { memoize: this.options.memoize })
  329. if (err.code === 'ENOENT' || err.code === 'EINTEGRITY')
  330. await CacheEntry.invalidate(this.request, this.options)
  331. body.emit('error', err)
  332. cacheStream.resume()
  333. })
  334. cacheStream.pipe(body)
  335. }
  336. }
  337. body.once('resume', onResume)
  338. body.once('end', removeOnResume)
  339. response = new Response(body, {
  340. url: this.entry.metadata.url,
  341. counter: options.counter,
  342. status: 200,
  343. headers: {
  344. ...this.policy.responseHeaders(),
  345. },
  346. })
  347. }
  348. response.headers.set('x-local-cache', encodeURIComponent(this.options.cachePath))
  349. response.headers.set('x-local-cache-hash', encodeURIComponent(this.entry.integrity))
  350. response.headers.set('x-local-cache-key', encodeURIComponent(this.key))
  351. response.headers.set('x-local-cache-mode', shouldBuffer ? 'buffer' : 'stream')
  352. response.headers.set('x-local-cache-status', status)
  353. response.headers.set('x-local-cache-time', new Date(this.entry.metadata.time).toUTCString())
  354. return response
  355. }
  356. // use the provided request along with this cache entry to
  357. // revalidate the stored response. returns a response, either
  358. // from the cache or from the update
  359. async revalidate (request, options) {
  360. const revalidateRequest = new Request(request, {
  361. headers: this.policy.revalidationHeaders(request),
  362. })
  363. try {
  364. // NOTE: be sure to remove the headers property from the
  365. // user supplied options, since we have already defined
  366. // them on the new request object. if they're still in the
  367. // options then those will overwrite the ones from the policy
  368. var response = await remote(revalidateRequest, {
  369. ...options,
  370. headers: undefined,
  371. })
  372. } catch (err) {
  373. // if the network fetch fails, return the stale
  374. // cached response unless it has a cache-control
  375. // of 'must-revalidate'
  376. if (!this.policy.mustRevalidate)
  377. return this.respond(request.method, options, 'stale')
  378. throw err
  379. }
  380. if (this.policy.revalidated(revalidateRequest, response)) {
  381. // we got a 304, write a new index to the cache and respond from cache
  382. const metadata = getMetadata(request, response, options)
  383. // 304 responses do not include headers that are specific to the response data
  384. // since they do not include a body, so we copy values for headers that were
  385. // in the old cache entry to the new one, if the new metadata does not already
  386. // include that header
  387. for (const name of KEEP_RESPONSE_HEADERS) {
  388. if (!hasOwnProperty(metadata.resHeaders, name) && hasOwnProperty(this.entry.metadata.resHeaders, name))
  389. metadata.resHeaders[name] = this.entry.metadata.resHeaders[name]
  390. }
  391. try {
  392. await cacache.index.insert(options.cachePath, this.key, this.entry.integrity, {
  393. size: this.entry.size,
  394. metadata,
  395. })
  396. } catch (err) {
  397. // if updating the cache index fails, we ignore it and
  398. // respond anyway
  399. }
  400. return this.respond(request.method, options, 'revalidated')
  401. }
  402. // if we got a modified response, create a new entry based on it
  403. const newEntry = new CacheEntry({
  404. request,
  405. response,
  406. options,
  407. })
  408. // respond with the new entry while writing it to the cache
  409. return newEntry.store('updated')
  410. }
  411. }
  412. module.exports = CacheEntry