parse.js 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428
  1. 'use strict'
  2. // this[BUFFER] is the remainder of a chunk if we're waiting for
  3. // the full 512 bytes of a header to come in. We will Buffer.concat()
  4. // it to the next write(), which is a mem copy, but a small one.
  5. //
  6. // this[QUEUE] is a Yallist of entries that haven't been emitted
  7. // yet this can only get filled up if the user keeps write()ing after
  8. // a write() returns false, or does a write() with more than one entry
  9. //
  10. // We don't buffer chunks, we always parse them and either create an
  11. // entry, or push it into the active entry. The ReadEntry class knows
  12. // to throw data away if .ignore=true
  13. //
  14. // Shift entry off the buffer when it emits 'end', and emit 'entry' for
  15. // the next one in the list.
  16. //
  17. // At any time, we're pushing body chunks into the entry at WRITEENTRY,
  18. // and waiting for 'end' on the entry at READENTRY
  19. //
  20. // ignored entries get .resume() called on them straight away
  21. const warner = require('./warn-mixin.js')
  22. const path = require('path')
  23. const Header = require('./header.js')
  24. const EE = require('events')
  25. const Yallist = require('yallist')
  26. const maxMetaEntrySize = 1024 * 1024
  27. const Entry = require('./read-entry.js')
  28. const Pax = require('./pax.js')
  29. const zlib = require('minizlib')
  30. const Buffer = require('./buffer.js')
  31. const gzipHeader = Buffer.from([0x1f, 0x8b])
  32. const STATE = Symbol('state')
  33. const WRITEENTRY = Symbol('writeEntry')
  34. const READENTRY = Symbol('readEntry')
  35. const NEXTENTRY = Symbol('nextEntry')
  36. const PROCESSENTRY = Symbol('processEntry')
  37. const EX = Symbol('extendedHeader')
  38. const GEX = Symbol('globalExtendedHeader')
  39. const META = Symbol('meta')
  40. const EMITMETA = Symbol('emitMeta')
  41. const BUFFER = Symbol('buffer')
  42. const QUEUE = Symbol('queue')
  43. const ENDED = Symbol('ended')
  44. const EMITTEDEND = Symbol('emittedEnd')
  45. const EMIT = Symbol('emit')
  46. const UNZIP = Symbol('unzip')
  47. const CONSUMECHUNK = Symbol('consumeChunk')
  48. const CONSUMECHUNKSUB = Symbol('consumeChunkSub')
  49. const CONSUMEBODY = Symbol('consumeBody')
  50. const CONSUMEMETA = Symbol('consumeMeta')
  51. const CONSUMEHEADER = Symbol('consumeHeader')
  52. const CONSUMING = Symbol('consuming')
  53. const BUFFERCONCAT = Symbol('bufferConcat')
  54. const MAYBEEND = Symbol('maybeEnd')
  55. const WRITING = Symbol('writing')
  56. const ABORTED = Symbol('aborted')
  57. const DONE = Symbol('onDone')
  58. const noop = _ => true
  59. module.exports = warner(class Parser extends EE {
  60. constructor (opt) {
  61. opt = opt || {}
  62. super(opt)
  63. if (opt.ondone)
  64. this.on(DONE, opt.ondone)
  65. else
  66. this.on(DONE, _ => {
  67. this.emit('prefinish')
  68. this.emit('finish')
  69. this.emit('end')
  70. this.emit('close')
  71. })
  72. this.strict = !!opt.strict
  73. this.maxMetaEntrySize = opt.maxMetaEntrySize || maxMetaEntrySize
  74. this.filter = typeof opt.filter === 'function' ? opt.filter : noop
  75. // have to set this so that streams are ok piping into it
  76. this.writable = true
  77. this.readable = false
  78. this[QUEUE] = new Yallist()
  79. this[BUFFER] = null
  80. this[READENTRY] = null
  81. this[WRITEENTRY] = null
  82. this[STATE] = 'begin'
  83. this[META] = ''
  84. this[EX] = null
  85. this[GEX] = null
  86. this[ENDED] = false
  87. this[UNZIP] = null
  88. this[ABORTED] = false
  89. if (typeof opt.onwarn === 'function')
  90. this.on('warn', opt.onwarn)
  91. if (typeof opt.onentry === 'function')
  92. this.on('entry', opt.onentry)
  93. }
  94. [CONSUMEHEADER] (chunk, position) {
  95. let header
  96. try {
  97. header = new Header(chunk, position, this[EX], this[GEX])
  98. } catch (er) {
  99. return this.warn('invalid entry', er)
  100. }
  101. if (header.nullBlock)
  102. this[EMIT]('nullBlock')
  103. else if (!header.cksumValid)
  104. this.warn('invalid entry', header)
  105. else if (!header.path)
  106. this.warn('invalid: path is required', header)
  107. else {
  108. const type = header.type
  109. if (/^(Symbolic)?Link$/.test(type) && !header.linkpath)
  110. this.warn('invalid: linkpath required', header)
  111. else if (!/^(Symbolic)?Link$/.test(type) && header.linkpath)
  112. this.warn('invalid: linkpath forbidden', header)
  113. else {
  114. const entry = this[WRITEENTRY] = new Entry(header, this[EX], this[GEX])
  115. if (entry.meta) {
  116. if (entry.size > this.maxMetaEntrySize) {
  117. entry.ignore = true
  118. this[EMIT]('ignoredEntry', entry)
  119. this[STATE] = 'ignore'
  120. } else if (entry.size > 0) {
  121. this[META] = ''
  122. entry.on('data', c => this[META] += c)
  123. this[STATE] = 'meta'
  124. }
  125. } else {
  126. this[EX] = null
  127. entry.ignore = entry.ignore || !this.filter(entry.path, entry)
  128. if (entry.ignore) {
  129. this[EMIT]('ignoredEntry', entry)
  130. this[STATE] = entry.remain ? 'ignore' : 'begin'
  131. } else {
  132. if (entry.remain)
  133. this[STATE] = 'body'
  134. else {
  135. this[STATE] = 'begin'
  136. entry.end()
  137. }
  138. if (!this[READENTRY]) {
  139. this[QUEUE].push(entry)
  140. this[NEXTENTRY]()
  141. } else
  142. this[QUEUE].push(entry)
  143. }
  144. }
  145. }
  146. }
  147. }
  148. [PROCESSENTRY] (entry) {
  149. let go = true
  150. if (!entry) {
  151. this[READENTRY] = null
  152. go = false
  153. } else if (Array.isArray(entry))
  154. this.emit.apply(this, entry)
  155. else {
  156. this[READENTRY] = entry
  157. this.emit('entry', entry)
  158. if (!entry.emittedEnd) {
  159. entry.on('end', _ => this[NEXTENTRY]())
  160. go = false
  161. }
  162. }
  163. return go
  164. }
  165. [NEXTENTRY] () {
  166. do {} while (this[PROCESSENTRY](this[QUEUE].shift()))
  167. if (!this[QUEUE].length) {
  168. // At this point, there's nothing in the queue, but we may have an
  169. // entry which is being consumed (readEntry).
  170. // If we don't, then we definitely can handle more data.
  171. // If we do, and either it's flowing, or it has never had any data
  172. // written to it, then it needs more.
  173. // The only other possibility is that it has returned false from a
  174. // write() call, so we wait for the next drain to continue.
  175. const re = this[READENTRY]
  176. const drainNow = !re || re.flowing || re.size === re.remain
  177. if (drainNow) {
  178. if (!this[WRITING])
  179. this.emit('drain')
  180. } else
  181. re.once('drain', _ => this.emit('drain'))
  182. }
  183. }
  184. [CONSUMEBODY] (chunk, position) {
  185. // write up to but no more than writeEntry.blockRemain
  186. const entry = this[WRITEENTRY]
  187. const br = entry.blockRemain
  188. const c = (br >= chunk.length && position === 0) ? chunk
  189. : chunk.slice(position, position + br)
  190. entry.write(c)
  191. if (!entry.blockRemain) {
  192. this[STATE] = 'begin'
  193. this[WRITEENTRY] = null
  194. entry.end()
  195. }
  196. return c.length
  197. }
  198. [CONSUMEMETA] (chunk, position) {
  199. const entry = this[WRITEENTRY]
  200. const ret = this[CONSUMEBODY](chunk, position)
  201. // if we finished, then the entry is reset
  202. if (!this[WRITEENTRY])
  203. this[EMITMETA](entry)
  204. return ret
  205. }
  206. [EMIT] (ev, data, extra) {
  207. if (!this[QUEUE].length && !this[READENTRY])
  208. this.emit(ev, data, extra)
  209. else
  210. this[QUEUE].push([ev, data, extra])
  211. }
  212. [EMITMETA] (entry) {
  213. this[EMIT]('meta', this[META])
  214. switch (entry.type) {
  215. case 'ExtendedHeader':
  216. case 'OldExtendedHeader':
  217. this[EX] = Pax.parse(this[META], this[EX], false)
  218. break
  219. case 'GlobalExtendedHeader':
  220. this[GEX] = Pax.parse(this[META], this[GEX], true)
  221. break
  222. case 'NextFileHasLongPath':
  223. case 'OldGnuLongPath':
  224. this[EX] = this[EX] || Object.create(null)
  225. this[EX].path = this[META].replace(/\0.*/, '')
  226. break
  227. case 'NextFileHasLongLinkpath':
  228. this[EX] = this[EX] || Object.create(null)
  229. this[EX].linkpath = this[META].replace(/\0.*/, '')
  230. break
  231. /* istanbul ignore next */
  232. default: throw new Error('unknown meta: ' + entry.type)
  233. }
  234. }
  235. abort (msg, error) {
  236. this[ABORTED] = true
  237. this.warn(msg, error)
  238. this.emit('abort', error)
  239. this.emit('error', error)
  240. }
  241. write (chunk) {
  242. if (this[ABORTED])
  243. return
  244. // first write, might be gzipped
  245. if (this[UNZIP] === null && chunk) {
  246. if (this[BUFFER]) {
  247. chunk = Buffer.concat([this[BUFFER], chunk])
  248. this[BUFFER] = null
  249. }
  250. if (chunk.length < gzipHeader.length) {
  251. this[BUFFER] = chunk
  252. return true
  253. }
  254. for (let i = 0; this[UNZIP] === null && i < gzipHeader.length; i++) {
  255. if (chunk[i] !== gzipHeader[i])
  256. this[UNZIP] = false
  257. }
  258. if (this[UNZIP] === null) {
  259. const ended = this[ENDED]
  260. this[ENDED] = false
  261. this[UNZIP] = new zlib.Unzip()
  262. this[UNZIP].on('data', chunk => this[CONSUMECHUNK](chunk))
  263. this[UNZIP].on('error', er =>
  264. this.abort(er.message, er))
  265. this[UNZIP].on('end', _ => {
  266. this[ENDED] = true
  267. this[CONSUMECHUNK]()
  268. })
  269. this[WRITING] = true
  270. const ret = this[UNZIP][ended ? 'end' : 'write' ](chunk)
  271. this[WRITING] = false
  272. return ret
  273. }
  274. }
  275. this[WRITING] = true
  276. if (this[UNZIP])
  277. this[UNZIP].write(chunk)
  278. else
  279. this[CONSUMECHUNK](chunk)
  280. this[WRITING] = false
  281. // return false if there's a queue, or if the current entry isn't flowing
  282. const ret =
  283. this[QUEUE].length ? false :
  284. this[READENTRY] ? this[READENTRY].flowing :
  285. true
  286. // if we have no queue, then that means a clogged READENTRY
  287. if (!ret && !this[QUEUE].length)
  288. this[READENTRY].once('drain', _ => this.emit('drain'))
  289. return ret
  290. }
  291. [BUFFERCONCAT] (c) {
  292. if (c && !this[ABORTED])
  293. this[BUFFER] = this[BUFFER] ? Buffer.concat([this[BUFFER], c]) : c
  294. }
  295. [MAYBEEND] () {
  296. if (this[ENDED] &&
  297. !this[EMITTEDEND] &&
  298. !this[ABORTED] &&
  299. !this[CONSUMING]) {
  300. this[EMITTEDEND] = true
  301. const entry = this[WRITEENTRY]
  302. if (entry && entry.blockRemain) {
  303. const have = this[BUFFER] ? this[BUFFER].length : 0
  304. this.warn('Truncated input (needed ' + entry.blockRemain +
  305. ' more bytes, only ' + have + ' available)', entry)
  306. if (this[BUFFER])
  307. entry.write(this[BUFFER])
  308. entry.end()
  309. }
  310. this[EMIT](DONE)
  311. }
  312. }
  313. [CONSUMECHUNK] (chunk) {
  314. if (this[CONSUMING]) {
  315. this[BUFFERCONCAT](chunk)
  316. } else if (!chunk && !this[BUFFER]) {
  317. this[MAYBEEND]()
  318. } else {
  319. this[CONSUMING] = true
  320. if (this[BUFFER]) {
  321. this[BUFFERCONCAT](chunk)
  322. const c = this[BUFFER]
  323. this[BUFFER] = null
  324. this[CONSUMECHUNKSUB](c)
  325. } else {
  326. this[CONSUMECHUNKSUB](chunk)
  327. }
  328. while (this[BUFFER] && this[BUFFER].length >= 512 && !this[ABORTED]) {
  329. const c = this[BUFFER]
  330. this[BUFFER] = null
  331. this[CONSUMECHUNKSUB](c)
  332. }
  333. this[CONSUMING] = false
  334. }
  335. if (!this[BUFFER] || this[ENDED])
  336. this[MAYBEEND]()
  337. }
  338. [CONSUMECHUNKSUB] (chunk) {
  339. // we know that we are in CONSUMING mode, so anything written goes into
  340. // the buffer. Advance the position and put any remainder in the buffer.
  341. let position = 0
  342. let length = chunk.length
  343. while (position + 512 <= length && !this[ABORTED]) {
  344. switch (this[STATE]) {
  345. case 'begin':
  346. this[CONSUMEHEADER](chunk, position)
  347. position += 512
  348. break
  349. case 'ignore':
  350. case 'body':
  351. position += this[CONSUMEBODY](chunk, position)
  352. break
  353. case 'meta':
  354. position += this[CONSUMEMETA](chunk, position)
  355. break
  356. /* istanbul ignore next */
  357. default:
  358. throw new Error('invalid state: ' + this[STATE])
  359. }
  360. }
  361. if (position < length) {
  362. if (this[BUFFER])
  363. this[BUFFER] = Buffer.concat([chunk.slice(position), this[BUFFER]])
  364. else
  365. this[BUFFER] = chunk.slice(position)
  366. }
  367. }
  368. end (chunk) {
  369. if (!this[ABORTED]) {
  370. if (this[UNZIP])
  371. this[UNZIP].end(chunk)
  372. else {
  373. this[ENDED] = true
  374. this.write(chunk)
  375. }
  376. }
  377. }
  378. })