index.js 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605
  1. /*
  2. seek-bzip - a pure-javascript module for seeking within bzip2 data
  3. Copyright (C) 2013 C. Scott Ananian
  4. Copyright (C) 2012 Eli Skeggs
  5. Copyright (C) 2011 Kevin Kwok
  6. Permission is hereby granted, free of charge, to any person obtaining
  7. a copy of this software and associated documentation files (the
  8. "Software"), to deal in the Software without restriction, including
  9. without limitation the rights to use, copy, modify, merge, publish,
  10. distribute, sublicense, and/or sell copies of the Software, and to
  11. permit persons to whom the Software is furnished to do so, subject to
  12. the following conditions:
  13. The above copyright notice and this permission notice shall be
  14. included in all copies or substantial portions of the Software.
  15. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  16. EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  17. MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  18. NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  19. LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  20. OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  21. WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  22. Adapted from node-bzip, copyright 2012 Eli Skeggs.
  23. Adapted from bzip2.js, copyright 2011 Kevin Kwok (antimatter15@gmail.com).
  24. Based on micro-bunzip by Rob Landley (rob@landley.net).
  25. Based on bzip2 decompression code by Julian R Seward (jseward@acm.org),
  26. which also acknowledges contributions by Mike Burrows, David Wheeler,
  27. Peter Fenwick, Alistair Moffat, Radford Neal, Ian H. Witten,
  28. Robert Sedgewick, and Jon L. Bentley.
  29. */
  30. var BitReader = require('./bitreader');
  31. var Stream = require('./stream');
  32. var CRC32 = require('./crc32');
  33. var pjson = require('../package.json');
  34. var MAX_HUFCODE_BITS = 20;
  35. var MAX_SYMBOLS = 258;
  36. var SYMBOL_RUNA = 0;
  37. var SYMBOL_RUNB = 1;
  38. var MIN_GROUPS = 2;
  39. var MAX_GROUPS = 6;
  40. var GROUP_SIZE = 50;
  41. var WHOLEPI = "314159265359";
  42. var SQRTPI = "177245385090";
  43. var mtf = function(array, index) {
  44. var src = array[index], i;
  45. for (i = index; i > 0; i--) {
  46. array[i] = array[i-1];
  47. }
  48. array[0] = src;
  49. return src;
  50. };
  51. var Err = {
  52. OK: 0,
  53. LAST_BLOCK: -1,
  54. NOT_BZIP_DATA: -2,
  55. UNEXPECTED_INPUT_EOF: -3,
  56. UNEXPECTED_OUTPUT_EOF: -4,
  57. DATA_ERROR: -5,
  58. OUT_OF_MEMORY: -6,
  59. OBSOLETE_INPUT: -7,
  60. END_OF_BLOCK: -8
  61. };
  62. var ErrorMessages = {};
  63. ErrorMessages[Err.LAST_BLOCK] = "Bad file checksum";
  64. ErrorMessages[Err.NOT_BZIP_DATA] = "Not bzip data";
  65. ErrorMessages[Err.UNEXPECTED_INPUT_EOF] = "Unexpected input EOF";
  66. ErrorMessages[Err.UNEXPECTED_OUTPUT_EOF] = "Unexpected output EOF";
  67. ErrorMessages[Err.DATA_ERROR] = "Data error";
  68. ErrorMessages[Err.OUT_OF_MEMORY] = "Out of memory";
  69. ErrorMessages[Err.OBSOLETE_INPUT] = "Obsolete (pre 0.9.5) bzip format not supported.";
  70. var _throw = function(status, optDetail) {
  71. var msg = ErrorMessages[status] || 'unknown error';
  72. if (optDetail) { msg += ': '+optDetail; }
  73. var e = new TypeError(msg);
  74. e.errorCode = status;
  75. throw e;
  76. };
  77. var Bunzip = function(inputStream, outputStream) {
  78. this.writePos = this.writeCurrent = this.writeCount = 0;
  79. this._start_bunzip(inputStream, outputStream);
  80. };
  81. Bunzip.prototype._init_block = function() {
  82. var moreBlocks = this._get_next_block();
  83. if ( !moreBlocks ) {
  84. this.writeCount = -1;
  85. return false; /* no more blocks */
  86. }
  87. this.blockCRC = new CRC32();
  88. return true;
  89. };
  90. /* XXX micro-bunzip uses (inputStream, inputBuffer, len) as arguments */
  91. Bunzip.prototype._start_bunzip = function(inputStream, outputStream) {
  92. /* Ensure that file starts with "BZh['1'-'9']." */
  93. var buf = new Buffer(4);
  94. if (inputStream.read(buf, 0, 4) !== 4 ||
  95. String.fromCharCode(buf[0], buf[1], buf[2]) !== 'BZh')
  96. _throw(Err.NOT_BZIP_DATA, 'bad magic');
  97. var level = buf[3] - 0x30;
  98. if (level < 1 || level > 9)
  99. _throw(Err.NOT_BZIP_DATA, 'level out of range');
  100. this.reader = new BitReader(inputStream);
  101. /* Fourth byte (ascii '1'-'9'), indicates block size in units of 100k of
  102. uncompressed data. Allocate intermediate buffer for block. */
  103. this.dbufSize = 100000 * level;
  104. this.nextoutput = 0;
  105. this.outputStream = outputStream;
  106. this.streamCRC = 0;
  107. };
  108. Bunzip.prototype._get_next_block = function() {
  109. var i, j, k;
  110. var reader = this.reader;
  111. // this is get_next_block() function from micro-bunzip:
  112. /* Read in header signature and CRC, then validate signature.
  113. (last block signature means CRC is for whole file, return now) */
  114. var h = reader.pi();
  115. if (h === SQRTPI) { // last block
  116. return false; /* no more blocks */
  117. }
  118. if (h !== WHOLEPI)
  119. _throw(Err.NOT_BZIP_DATA);
  120. this.targetBlockCRC = reader.read(32) >>> 0; // (convert to unsigned)
  121. this.streamCRC = (this.targetBlockCRC ^
  122. ((this.streamCRC << 1) | (this.streamCRC>>>31))) >>> 0;
  123. /* We can add support for blockRandomised if anybody complains. There was
  124. some code for this in busybox 1.0.0-pre3, but nobody ever noticed that
  125. it didn't actually work. */
  126. if (reader.read(1))
  127. _throw(Err.OBSOLETE_INPUT);
  128. var origPointer = reader.read(24);
  129. if (origPointer > this.dbufSize)
  130. _throw(Err.DATA_ERROR, 'initial position out of bounds');
  131. /* mapping table: if some byte values are never used (encoding things
  132. like ascii text), the compression code removes the gaps to have fewer
  133. symbols to deal with, and writes a sparse bitfield indicating which
  134. values were present. We make a translation table to convert the symbols
  135. back to the corresponding bytes. */
  136. var t = reader.read(16);
  137. var symToByte = new Buffer(256), symTotal = 0;
  138. for (i = 0; i < 16; i++) {
  139. if (t & (1 << (0xF - i))) {
  140. var o = i * 16;
  141. k = reader.read(16);
  142. for (j = 0; j < 16; j++)
  143. if (k & (1 << (0xF - j)))
  144. symToByte[symTotal++] = o + j;
  145. }
  146. }
  147. /* How many different huffman coding groups does this block use? */
  148. var groupCount = reader.read(3);
  149. if (groupCount < MIN_GROUPS || groupCount > MAX_GROUPS)
  150. _throw(Err.DATA_ERROR);
  151. /* nSelectors: Every GROUP_SIZE many symbols we select a new huffman coding
  152. group. Read in the group selector list, which is stored as MTF encoded
  153. bit runs. (MTF=Move To Front, as each value is used it's moved to the
  154. start of the list.) */
  155. var nSelectors = reader.read(15);
  156. if (nSelectors === 0)
  157. _throw(Err.DATA_ERROR);
  158. var mtfSymbol = new Buffer(256);
  159. for (i = 0; i < groupCount; i++)
  160. mtfSymbol[i] = i;
  161. var selectors = new Buffer(nSelectors); // was 32768...
  162. for (i = 0; i < nSelectors; i++) {
  163. /* Get next value */
  164. for (j = 0; reader.read(1); j++)
  165. if (j >= groupCount) _throw(Err.DATA_ERROR);
  166. /* Decode MTF to get the next selector */
  167. selectors[i] = mtf(mtfSymbol, j);
  168. }
  169. /* Read the huffman coding tables for each group, which code for symTotal
  170. literal symbols, plus two run symbols (RUNA, RUNB) */
  171. var symCount = symTotal + 2;
  172. var groups = [], hufGroup;
  173. for (j = 0; j < groupCount; j++) {
  174. var length = new Buffer(symCount), temp = new Uint16Array(MAX_HUFCODE_BITS + 1);
  175. /* Read huffman code lengths for each symbol. They're stored in
  176. a way similar to mtf; record a starting value for the first symbol,
  177. and an offset from the previous value for everys symbol after that. */
  178. t = reader.read(5); // lengths
  179. for (i = 0; i < symCount; i++) {
  180. for (;;) {
  181. if (t < 1 || t > MAX_HUFCODE_BITS) _throw(Err.DATA_ERROR);
  182. /* If first bit is 0, stop. Else second bit indicates whether
  183. to increment or decrement the value. */
  184. if(!reader.read(1))
  185. break;
  186. if(!reader.read(1))
  187. t++;
  188. else
  189. t--;
  190. }
  191. length[i] = t;
  192. }
  193. /* Find largest and smallest lengths in this group */
  194. var minLen, maxLen;
  195. minLen = maxLen = length[0];
  196. for (i = 1; i < symCount; i++) {
  197. if (length[i] > maxLen)
  198. maxLen = length[i];
  199. else if (length[i] < minLen)
  200. minLen = length[i];
  201. }
  202. /* Calculate permute[], base[], and limit[] tables from length[].
  203. *
  204. * permute[] is the lookup table for converting huffman coded symbols
  205. * into decoded symbols. base[] is the amount to subtract from the
  206. * value of a huffman symbol of a given length when using permute[].
  207. *
  208. * limit[] indicates the largest numerical value a symbol with a given
  209. * number of bits can have. This is how the huffman codes can vary in
  210. * length: each code with a value>limit[length] needs another bit.
  211. */
  212. hufGroup = {};
  213. groups.push(hufGroup);
  214. hufGroup.permute = new Uint16Array(MAX_SYMBOLS);
  215. hufGroup.limit = new Uint32Array(MAX_HUFCODE_BITS + 2);
  216. hufGroup.base = new Uint32Array(MAX_HUFCODE_BITS + 1);
  217. hufGroup.minLen = minLen;
  218. hufGroup.maxLen = maxLen;
  219. /* Calculate permute[]. Concurently, initialize temp[] and limit[]. */
  220. var pp = 0;
  221. for (i = minLen; i <= maxLen; i++) {
  222. temp[i] = hufGroup.limit[i] = 0;
  223. for (t = 0; t < symCount; t++)
  224. if (length[t] === i)
  225. hufGroup.permute[pp++] = t;
  226. }
  227. /* Count symbols coded for at each bit length */
  228. for (i = 0; i < symCount; i++)
  229. temp[length[i]]++;
  230. /* Calculate limit[] (the largest symbol-coding value at each bit
  231. * length, which is (previous limit<<1)+symbols at this level), and
  232. * base[] (number of symbols to ignore at each bit length, which is
  233. * limit minus the cumulative count of symbols coded for already). */
  234. pp = t = 0;
  235. for (i = minLen; i < maxLen; i++) {
  236. pp += temp[i];
  237. /* We read the largest possible symbol size and then unget bits
  238. after determining how many we need, and those extra bits could
  239. be set to anything. (They're noise from future symbols.) At
  240. each level we're really only interested in the first few bits,
  241. so here we set all the trailing to-be-ignored bits to 1 so they
  242. don't affect the value>limit[length] comparison. */
  243. hufGroup.limit[i] = pp - 1;
  244. pp <<= 1;
  245. t += temp[i];
  246. hufGroup.base[i + 1] = pp - t;
  247. }
  248. hufGroup.limit[maxLen + 1] = Number.MAX_VALUE; /* Sentinal value for reading next sym. */
  249. hufGroup.limit[maxLen] = pp + temp[maxLen] - 1;
  250. hufGroup.base[minLen] = 0;
  251. }
  252. /* We've finished reading and digesting the block header. Now read this
  253. block's huffman coded symbols from the file and undo the huffman coding
  254. and run length encoding, saving the result into dbuf[dbufCount++]=uc */
  255. /* Initialize symbol occurrence counters and symbol Move To Front table */
  256. var byteCount = new Uint32Array(256);
  257. for (i = 0; i < 256; i++)
  258. mtfSymbol[i] = i;
  259. /* Loop through compressed symbols. */
  260. var runPos = 0, dbufCount = 0, selector = 0, uc;
  261. var dbuf = this.dbuf = new Uint32Array(this.dbufSize);
  262. symCount = 0;
  263. for (;;) {
  264. /* Determine which huffman coding group to use. */
  265. if (!(symCount--)) {
  266. symCount = GROUP_SIZE - 1;
  267. if (selector >= nSelectors) { _throw(Err.DATA_ERROR); }
  268. hufGroup = groups[selectors[selector++]];
  269. }
  270. /* Read next huffman-coded symbol. */
  271. i = hufGroup.minLen;
  272. j = reader.read(i);
  273. for (;;i++) {
  274. if (i > hufGroup.maxLen) { _throw(Err.DATA_ERROR); }
  275. if (j <= hufGroup.limit[i])
  276. break;
  277. j = (j << 1) | reader.read(1);
  278. }
  279. /* Huffman decode value to get nextSym (with bounds checking) */
  280. j -= hufGroup.base[i];
  281. if (j < 0 || j >= MAX_SYMBOLS) { _throw(Err.DATA_ERROR); }
  282. var nextSym = hufGroup.permute[j];
  283. /* We have now decoded the symbol, which indicates either a new literal
  284. byte, or a repeated run of the most recent literal byte. First,
  285. check if nextSym indicates a repeated run, and if so loop collecting
  286. how many times to repeat the last literal. */
  287. if (nextSym === SYMBOL_RUNA || nextSym === SYMBOL_RUNB) {
  288. /* If this is the start of a new run, zero out counter */
  289. if (!runPos){
  290. runPos = 1;
  291. t = 0;
  292. }
  293. /* Neat trick that saves 1 symbol: instead of or-ing 0 or 1 at
  294. each bit position, add 1 or 2 instead. For example,
  295. 1011 is 1<<0 + 1<<1 + 2<<2. 1010 is 2<<0 + 2<<1 + 1<<2.
  296. You can make any bit pattern that way using 1 less symbol than
  297. the basic or 0/1 method (except all bits 0, which would use no
  298. symbols, but a run of length 0 doesn't mean anything in this
  299. context). Thus space is saved. */
  300. if (nextSym === SYMBOL_RUNA)
  301. t += runPos;
  302. else
  303. t += 2 * runPos;
  304. runPos <<= 1;
  305. continue;
  306. }
  307. /* When we hit the first non-run symbol after a run, we now know
  308. how many times to repeat the last literal, so append that many
  309. copies to our buffer of decoded symbols (dbuf) now. (The last
  310. literal used is the one at the head of the mtfSymbol array.) */
  311. if (runPos){
  312. runPos = 0;
  313. if (dbufCount + t > this.dbufSize) { _throw(Err.DATA_ERROR); }
  314. uc = symToByte[mtfSymbol[0]];
  315. byteCount[uc] += t;
  316. while (t--)
  317. dbuf[dbufCount++] = uc;
  318. }
  319. /* Is this the terminating symbol? */
  320. if (nextSym > symTotal)
  321. break;
  322. /* At this point, nextSym indicates a new literal character. Subtract
  323. one to get the position in the MTF array at which this literal is
  324. currently to be found. (Note that the result can't be -1 or 0,
  325. because 0 and 1 are RUNA and RUNB. But another instance of the
  326. first symbol in the mtf array, position 0, would have been handled
  327. as part of a run above. Therefore 1 unused mtf position minus
  328. 2 non-literal nextSym values equals -1.) */
  329. if (dbufCount >= this.dbufSize) { _throw(Err.DATA_ERROR); }
  330. i = nextSym - 1;
  331. uc = mtf(mtfSymbol, i);
  332. uc = symToByte[uc];
  333. /* We have our literal byte. Save it into dbuf. */
  334. byteCount[uc]++;
  335. dbuf[dbufCount++] = uc;
  336. }
  337. /* At this point, we've read all the huffman-coded symbols (and repeated
  338. runs) for this block from the input stream, and decoded them into the
  339. intermediate buffer. There are dbufCount many decoded bytes in dbuf[].
  340. Now undo the Burrows-Wheeler transform on dbuf.
  341. See http://dogma.net/markn/articles/bwt/bwt.htm
  342. */
  343. if (origPointer < 0 || origPointer >= dbufCount) { _throw(Err.DATA_ERROR); }
  344. /* Turn byteCount into cumulative occurrence counts of 0 to n-1. */
  345. j = 0;
  346. for (i = 0; i < 256; i++) {
  347. k = j + byteCount[i];
  348. byteCount[i] = j;
  349. j = k;
  350. }
  351. /* Figure out what order dbuf would be in if we sorted it. */
  352. for (i = 0; i < dbufCount; i++) {
  353. uc = dbuf[i] & 0xff;
  354. dbuf[byteCount[uc]] |= (i << 8);
  355. byteCount[uc]++;
  356. }
  357. /* Decode first byte by hand to initialize "previous" byte. Note that it
  358. doesn't get output, and if the first three characters are identical
  359. it doesn't qualify as a run (hence writeRunCountdown=5). */
  360. var pos = 0, current = 0, run = 0;
  361. if (dbufCount) {
  362. pos = dbuf[origPointer];
  363. current = (pos & 0xff);
  364. pos >>= 8;
  365. run = -1;
  366. }
  367. this.writePos = pos;
  368. this.writeCurrent = current;
  369. this.writeCount = dbufCount;
  370. this.writeRun = run;
  371. return true; /* more blocks to come */
  372. };
  373. /* Undo burrows-wheeler transform on intermediate buffer to produce output.
  374. If start_bunzip was initialized with out_fd=-1, then up to len bytes of
  375. data are written to outbuf. Return value is number of bytes written or
  376. error (all errors are negative numbers). If out_fd!=-1, outbuf and len
  377. are ignored, data is written to out_fd and return is RETVAL_OK or error.
  378. */
  379. Bunzip.prototype._read_bunzip = function(outputBuffer, len) {
  380. var copies, previous, outbyte;
  381. /* james@jamestaylor.org: writeCount goes to -1 when the buffer is fully
  382. decoded, which results in this returning RETVAL_LAST_BLOCK, also
  383. equal to -1... Confusing, I'm returning 0 here to indicate no
  384. bytes written into the buffer */
  385. if (this.writeCount < 0) { return 0; }
  386. var gotcount = 0;
  387. var dbuf = this.dbuf, pos = this.writePos, current = this.writeCurrent;
  388. var dbufCount = this.writeCount, outputsize = this.outputsize;
  389. var run = this.writeRun;
  390. while (dbufCount) {
  391. dbufCount--;
  392. previous = current;
  393. pos = dbuf[pos];
  394. current = pos & 0xff;
  395. pos >>= 8;
  396. if (run++ === 3){
  397. copies = current;
  398. outbyte = previous;
  399. current = -1;
  400. } else {
  401. copies = 1;
  402. outbyte = current;
  403. }
  404. this.blockCRC.updateCRCRun(outbyte, copies);
  405. while (copies--) {
  406. this.outputStream.writeByte(outbyte);
  407. this.nextoutput++;
  408. }
  409. if (current != previous)
  410. run = 0;
  411. }
  412. this.writeCount = dbufCount;
  413. // check CRC
  414. if (this.blockCRC.getCRC() !== this.targetBlockCRC) {
  415. _throw(Err.DATA_ERROR, "Bad block CRC "+
  416. "(got "+this.blockCRC.getCRC().toString(16)+
  417. " expected "+this.targetBlockCRC.toString(16)+")");
  418. }
  419. return this.nextoutput;
  420. };
  421. var coerceInputStream = function(input) {
  422. if ('readByte' in input) { return input; }
  423. var inputStream = new Stream();
  424. inputStream.pos = 0;
  425. inputStream.readByte = function() { return input[this.pos++]; };
  426. inputStream.seek = function(pos) { this.pos = pos; };
  427. inputStream.eof = function() { return this.pos >= input.length; };
  428. return inputStream;
  429. };
  430. var coerceOutputStream = function(output) {
  431. var outputStream = new Stream();
  432. var resizeOk = true;
  433. if (output) {
  434. if (typeof(output)==='number') {
  435. outputStream.buffer = new Buffer(output);
  436. resizeOk = false;
  437. } else if ('writeByte' in output) {
  438. return output;
  439. } else {
  440. outputStream.buffer = output;
  441. resizeOk = false;
  442. }
  443. } else {
  444. outputStream.buffer = new Buffer(16384);
  445. }
  446. outputStream.pos = 0;
  447. outputStream.writeByte = function(_byte) {
  448. if (resizeOk && this.pos >= this.buffer.length) {
  449. var newBuffer = new Buffer(this.buffer.length*2);
  450. this.buffer.copy(newBuffer);
  451. this.buffer = newBuffer;
  452. }
  453. this.buffer[this.pos++] = _byte;
  454. };
  455. outputStream.getBuffer = function() {
  456. // trim buffer
  457. if (this.pos !== this.buffer.length) {
  458. if (!resizeOk)
  459. throw new TypeError('outputsize does not match decoded input');
  460. var newBuffer = new Buffer(this.pos);
  461. this.buffer.copy(newBuffer, 0, 0, this.pos);
  462. this.buffer = newBuffer;
  463. }
  464. return this.buffer;
  465. };
  466. outputStream._coerced = true;
  467. return outputStream;
  468. };
  469. /* Static helper functions */
  470. Bunzip.Err = Err;
  471. // 'input' can be a stream or a buffer
  472. // 'output' can be a stream or a buffer or a number (buffer size)
  473. Bunzip.decode = function(input, output, multistream) {
  474. // make a stream from a buffer, if necessary
  475. var inputStream = coerceInputStream(input);
  476. var outputStream = coerceOutputStream(output);
  477. var bz = new Bunzip(inputStream, outputStream);
  478. while (true) {
  479. if ('eof' in inputStream && inputStream.eof()) break;
  480. if (bz._init_block()) {
  481. bz._read_bunzip();
  482. } else {
  483. var targetStreamCRC = bz.reader.read(32) >>> 0; // (convert to unsigned)
  484. if (targetStreamCRC !== bz.streamCRC) {
  485. _throw(Err.DATA_ERROR, "Bad stream CRC "+
  486. "(got "+bz.streamCRC.toString(16)+
  487. " expected "+targetStreamCRC.toString(16)+")");
  488. }
  489. if (multistream &&
  490. 'eof' in inputStream &&
  491. !inputStream.eof()) {
  492. // note that start_bunzip will also resync the bit reader to next byte
  493. bz._start_bunzip(inputStream, outputStream);
  494. } else break;
  495. }
  496. }
  497. if ('getBuffer' in outputStream)
  498. return outputStream.getBuffer();
  499. };
  500. Bunzip.decodeBlock = function(input, pos, output) {
  501. // make a stream from a buffer, if necessary
  502. var inputStream = coerceInputStream(input);
  503. var outputStream = coerceOutputStream(output);
  504. var bz = new Bunzip(inputStream, outputStream);
  505. bz.reader.seek(pos);
  506. /* Fill the decode buffer for the block */
  507. var moreBlocks = bz._get_next_block();
  508. if (moreBlocks) {
  509. /* Init the CRC for writing */
  510. bz.blockCRC = new CRC32();
  511. /* Zero this so the current byte from before the seek is not written */
  512. bz.writeCopies = 0;
  513. /* Decompress the block and write to stdout */
  514. bz._read_bunzip();
  515. // XXX keep writing?
  516. }
  517. if ('getBuffer' in outputStream)
  518. return outputStream.getBuffer();
  519. };
  520. /* Reads bzip2 file from stream or buffer `input`, and invoke
  521. * `callback(position, size)` once for each bzip2 block,
  522. * where position gives the starting position (in *bits*)
  523. * and size gives uncompressed size of the block (in *bytes*). */
  524. Bunzip.table = function(input, callback, multistream) {
  525. // make a stream from a buffer, if necessary
  526. var inputStream = new Stream();
  527. inputStream.delegate = coerceInputStream(input);
  528. inputStream.pos = 0;
  529. inputStream.readByte = function() {
  530. this.pos++;
  531. return this.delegate.readByte();
  532. };
  533. if (inputStream.delegate.eof) {
  534. inputStream.eof = inputStream.delegate.eof.bind(inputStream.delegate);
  535. }
  536. var outputStream = new Stream();
  537. outputStream.pos = 0;
  538. outputStream.writeByte = function() { this.pos++; };
  539. var bz = new Bunzip(inputStream, outputStream);
  540. var blockSize = bz.dbufSize;
  541. while (true) {
  542. if ('eof' in inputStream && inputStream.eof()) break;
  543. var position = inputStream.pos*8 + bz.reader.bitOffset;
  544. if (bz.reader.hasByte) { position -= 8; }
  545. if (bz._init_block()) {
  546. var start = outputStream.pos;
  547. bz._read_bunzip();
  548. callback(position, outputStream.pos - start);
  549. } else {
  550. var crc = bz.reader.read(32); // (but we ignore the crc)
  551. if (multistream &&
  552. 'eof' in inputStream &&
  553. !inputStream.eof()) {
  554. // note that start_bunzip will also resync the bit reader to next byte
  555. bz._start_bunzip(inputStream, outputStream);
  556. console.assert(bz.dbufSize === blockSize,
  557. "shouldn't change block size within multistream file");
  558. } else break;
  559. }
  560. }
  561. };
  562. Bunzip.Stream = Stream;
  563. Bunzip.version = pjson.version;
  564. Bunzip.license = pjson.license;
  565. module.exports = Bunzip;