byline.js 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. // Copyright (C) 2011-2015 John Hewson
  2. //
  3. // Permission is hereby granted, free of charge, to any person obtaining a copy
  4. // of this software and associated documentation files (the "Software"), to
  5. // deal in the Software without restriction, including without limitation the
  6. // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  7. // sell copies of the Software, and to permit persons to whom the Software is
  8. // furnished to do so, subject to the following conditions:
  9. //
  10. // The above copyright notice and this permission notice shall be included in
  11. // all copies or substantial portions of the Software.
  12. //
  13. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  16. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  17. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  18. // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  19. // IN THE SOFTWARE.
  20. var stream = require('stream'),
  21. util = require('util'),
  22. timers = require('timers');
  23. // convinience API
  24. module.exports = function(readStream, options) {
  25. return module.exports.createStream(readStream, options);
  26. };
  27. // basic API
  28. module.exports.createStream = function(readStream, options) {
  29. if (readStream) {
  30. return createLineStream(readStream, options);
  31. } else {
  32. return new LineStream(options);
  33. }
  34. };
  35. // deprecated API
  36. module.exports.createLineStream = function(readStream) {
  37. console.log('WARNING: byline#createLineStream is deprecated and will be removed soon');
  38. return createLineStream(readStream);
  39. };
  40. function createLineStream(readStream, options) {
  41. if (!readStream) {
  42. throw new Error('expected readStream');
  43. }
  44. if (!readStream.readable) {
  45. throw new Error('readStream must be readable');
  46. }
  47. var ls = new LineStream(options);
  48. readStream.pipe(ls);
  49. return ls;
  50. }
  51. //
  52. // using the new node v0.10 "streams2" API
  53. //
  54. module.exports.LineStream = LineStream;
  55. function LineStream(options) {
  56. stream.Transform.call(this, options);
  57. options = options || {};
  58. // use objectMode to stop the output from being buffered
  59. // which re-concatanates the lines, just without newlines.
  60. this._readableState.objectMode = true;
  61. this._lineBuffer = [];
  62. this._keepEmptyLines = options.keepEmptyLines || false;
  63. this._lastChunkEndedWithCR = false;
  64. // take the source's encoding if we don't have one
  65. var self = this;
  66. this.on('pipe', function(src) {
  67. if (!self.encoding) {
  68. // but we can't do this for old-style streams
  69. if (src instanceof stream.Readable) {
  70. self.encoding = src._readableState.encoding;
  71. }
  72. }
  73. });
  74. }
  75. util.inherits(LineStream, stream.Transform);
  76. LineStream.prototype._transform = function(chunk, encoding, done) {
  77. // decode binary chunks as UTF-8
  78. encoding = encoding || 'utf8';
  79. if (Buffer.isBuffer(chunk)) {
  80. if (encoding == 'buffer') {
  81. chunk = chunk.toString(); // utf8
  82. encoding = 'utf8';
  83. }
  84. else {
  85. chunk = chunk.toString(encoding);
  86. }
  87. }
  88. this._chunkEncoding = encoding;
  89. // see: http://www.unicode.org/reports/tr18/#Line_Boundaries
  90. var lines = chunk.split(/\r\n|[\n\v\f\r\x85\u2028\u2029]/g);
  91. // don't split CRLF which spans chunks
  92. if (this._lastChunkEndedWithCR && chunk[0] == '\n') {
  93. lines.shift();
  94. }
  95. if (this._lineBuffer.length > 0) {
  96. this._lineBuffer[this._lineBuffer.length - 1] += lines[0];
  97. lines.shift();
  98. }
  99. this._lastChunkEndedWithCR = chunk[chunk.length - 1] == '\r';
  100. this._lineBuffer = this._lineBuffer.concat(lines);
  101. this._pushBuffer(encoding, 1, done);
  102. };
  103. LineStream.prototype._pushBuffer = function(encoding, keep, done) {
  104. // always buffer the last (possibly partial) line
  105. while (this._lineBuffer.length > keep) {
  106. var line = this._lineBuffer.shift();
  107. // skip empty lines
  108. if (this._keepEmptyLines || line.length > 0 ) {
  109. if (!this.push(this._reencode(line, encoding))) {
  110. // when the high-water mark is reached, defer pushes until the next tick
  111. var self = this;
  112. timers.setImmediate(function() {
  113. self._pushBuffer(encoding, keep, done);
  114. });
  115. return;
  116. }
  117. }
  118. }
  119. done();
  120. };
  121. LineStream.prototype._flush = function(done) {
  122. this._pushBuffer(this._chunkEncoding, 0, done);
  123. };
  124. // see Readable::push
  125. LineStream.prototype._reencode = function(line, chunkEncoding) {
  126. if (this.encoding && this.encoding != chunkEncoding) {
  127. return new Buffer(line, chunkEncoding).toString(this.encoding);
  128. }
  129. else if (this.encoding) {
  130. // this should be the most common case, i.e. we're using an encoded source stream
  131. return line;
  132. }
  133. else {
  134. return new Buffer(line, chunkEncoding);
  135. }
  136. };