index.js 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228
  1. var stream = require('stream')
  2. var inherits = require('inherits')
  3. var genobj = require('generate-object-property')
  4. var genfun = require('generate-function')
  5. var bufferFrom = require('buffer-from')
  6. var bufferAlloc = require('buffer-alloc')
  7. var quote = bufferFrom('"')[0]
  8. var comma = bufferFrom(',')[0]
  9. var cr = bufferFrom('\r')[0]
  10. var nl = bufferFrom('\n')[0]
  11. var Parser = function (opts) {
  12. if (!opts) opts = {}
  13. if (Array.isArray(opts)) opts = {headers: opts}
  14. stream.Transform.call(this, {objectMode: true, highWaterMark: 16})
  15. this.separator = opts.separator ? bufferFrom(opts.separator)[0] : comma
  16. this.quote = opts.quote ? bufferFrom(opts.quote)[0] : quote
  17. this.escape = opts.escape ? bufferFrom(opts.escape)[0] : this.quote
  18. if (opts.newline) {
  19. this.newline = bufferFrom(opts.newline)[0]
  20. this.customNewline = true
  21. } else {
  22. this.newline = nl
  23. this.customNewline = false
  24. }
  25. this.headers = opts.headers || null
  26. this.strict = opts.strict || null
  27. this.mapHeaders = opts.mapHeaders || identity
  28. this.mapValues = opts.mapValues || identity
  29. this._raw = !!opts.raw
  30. this._prev = null
  31. this._prevEnd = 0
  32. this._first = true
  33. this._quoted = false
  34. this._escaped = false
  35. this._empty = this._raw ? bufferAlloc(0) : ''
  36. this._Row = null
  37. if (this.headers) {
  38. this._first = false
  39. this._compile(this.headers)
  40. }
  41. }
  42. inherits(Parser, stream.Transform)
  43. Parser.prototype._transform = function (data, enc, cb) {
  44. if (typeof data === 'string') data = bufferFrom(data)
  45. var start = 0
  46. var buf = data
  47. if (this._prev) {
  48. start = this._prev.length
  49. buf = Buffer.concat([this._prev, data])
  50. this._prev = null
  51. }
  52. var bufLen = buf.length
  53. for (var i = start; i < bufLen; i++) {
  54. var chr = buf[i]
  55. var nextChr = i + 1 < bufLen ? buf[i + 1] : null
  56. if (!this._escaped && chr === this.escape && nextChr === this.quote && i !== start) {
  57. this._escaped = true
  58. continue
  59. } else if (chr === this.quote) {
  60. if (this._escaped) {
  61. this._escaped = false
  62. // non-escaped quote (quoting the cell)
  63. } else {
  64. this._quoted = !this._quoted
  65. }
  66. continue
  67. }
  68. if (!this._quoted) {
  69. if (this._first && !this.customNewline) {
  70. if (chr === nl) {
  71. this.newline = nl
  72. } else if (chr === cr) {
  73. if (nextChr !== nl) {
  74. this.newline = cr
  75. }
  76. }
  77. }
  78. if (chr === this.newline) {
  79. this._online(buf, this._prevEnd, i + 1)
  80. this._prevEnd = i + 1
  81. }
  82. }
  83. }
  84. if (this._prevEnd === bufLen) {
  85. this._prevEnd = 0
  86. return cb()
  87. }
  88. if (bufLen - this._prevEnd < data.length) {
  89. this._prev = data
  90. this._prevEnd -= (bufLen - data.length)
  91. return cb()
  92. }
  93. this._prev = buf
  94. cb()
  95. }
  96. Parser.prototype._flush = function (cb) {
  97. if (this._escaped || !this._prev) return cb()
  98. this._online(this._prev, this._prevEnd, this._prev.length + 1) // plus since online -1s
  99. cb()
  100. }
  101. Parser.prototype._online = function (buf, start, end) {
  102. end-- // trim newline
  103. if (!this.customNewline && buf.length && buf[end - 1] === cr) end--
  104. var comma = this.separator
  105. var cells = []
  106. var isQuoted = false
  107. var offset = start
  108. for (var i = start; i < end; i++) {
  109. var isStartingQuote = !isQuoted && buf[i] === this.quote
  110. var isEndingQuote = isQuoted && buf[i] === this.quote && i + 1 <= end && buf[i + 1] === comma
  111. var isEscape = isQuoted && buf[i] === this.escape && i + 1 < end && buf[i + 1] === this.quote
  112. if (isStartingQuote || isEndingQuote) {
  113. isQuoted = !isQuoted
  114. continue
  115. } else if (isEscape) {
  116. i++
  117. continue
  118. }
  119. if (buf[i] === comma && !isQuoted) {
  120. cells.push(this._oncell(buf, offset, i))
  121. offset = i + 1
  122. }
  123. }
  124. if (offset < end) cells.push(this._oncell(buf, offset, end))
  125. if (buf[end - 1] === comma) cells.push(this._empty)
  126. if (this._first) {
  127. this._first = false
  128. this.headers = cells
  129. this._compile(cells)
  130. this.emit('headers', this.headers)
  131. return
  132. }
  133. if (this.strict && cells.length !== this.headers.length) {
  134. this.emit('error', new Error('Row length does not match headers'))
  135. } else {
  136. this._emit(this._Row, cells)
  137. }
  138. }
  139. Parser.prototype._compile = function () {
  140. if (this._Row) return
  141. var Row = genfun()('function Row (cells) {')
  142. var self = this
  143. this.headers.forEach(function (cell, i) {
  144. var newHeader = self.mapHeaders(cell, i)
  145. if (newHeader) {
  146. Row('%s = cells[%d]', genobj('this', newHeader), i)
  147. }
  148. })
  149. Row('}')
  150. this._Row = Row.toFunction()
  151. if (Object.defineProperty) {
  152. Object.defineProperty(this._Row.prototype, 'headers', {
  153. enumerable: false,
  154. value: this.headers
  155. })
  156. } else {
  157. this._Row.prototype.headers = this.headers
  158. }
  159. }
  160. Parser.prototype._emit = function (Row, cells) {
  161. this.push(new Row(cells))
  162. }
  163. Parser.prototype._oncell = function (buf, start, end) {
  164. // remove quotes from quoted cells
  165. if (buf[start] === this.quote && buf[end - 1] === this.quote) {
  166. start++
  167. end--
  168. }
  169. for (var i = start, y = start; i < end; i++) {
  170. // check for escape characters and skip them
  171. if (buf[i] === this.escape && i + 1 < end && buf[i + 1] === this.quote) i++
  172. if (y !== i) buf[y] = buf[i]
  173. y++
  174. }
  175. var value = this._onvalue(buf, start, y)
  176. return this._first ? value : this.mapValues(value)
  177. }
  178. Parser.prototype._onvalue = function (buf, start, end) {
  179. if (this._raw) return buf.slice(start, end)
  180. return buf.toString('utf-8', start, end)
  181. }
  182. function identity (id) {
  183. return id
  184. }
  185. module.exports = function (opts) {
  186. return new Parser(opts)
  187. }