index.js 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151
  1. var fs = require('fs');
  2. var utf8 = require('./encoding/utf8'),
  3. unicode = require('./encoding/unicode'),
  4. mbcs = require('./encoding/mbcs'),
  5. sbcs = require('./encoding/sbcs'),
  6. iso2022 = require('./encoding/iso2022');
  7. var self = this;
  8. var recognisers = [
  9. new utf8,
  10. new unicode.UTF_16BE,
  11. new unicode.UTF_16LE,
  12. new unicode.UTF_32BE,
  13. new unicode.UTF_32LE,
  14. new mbcs.sjis,
  15. new mbcs.big5,
  16. new mbcs.euc_jp,
  17. new mbcs.euc_kr,
  18. new mbcs.gb_18030,
  19. new iso2022.ISO_2022_JP,
  20. new iso2022.ISO_2022_KR,
  21. new iso2022.ISO_2022_CN,
  22. new sbcs.ISO_8859_1,
  23. new sbcs.ISO_8859_2,
  24. new sbcs.ISO_8859_5,
  25. new sbcs.ISO_8859_6,
  26. new sbcs.ISO_8859_7,
  27. new sbcs.ISO_8859_8,
  28. new sbcs.ISO_8859_9,
  29. new sbcs.windows_1251,
  30. new sbcs.windows_1256,
  31. new sbcs.KOI8_R
  32. ];
  33. module.exports.detect = function(buffer, opts) {
  34. // Tally up the byte occurence statistics.
  35. var fByteStats = [];
  36. for (var i = 0; i < 256; i++)
  37. fByteStats[i] = 0;
  38. for (var i = buffer.length - 1; i >= 0; i--)
  39. fByteStats[buffer[i] & 0x00ff]++;
  40. var fC1Bytes = false;
  41. for (var i = 0x80; i <= 0x9F; i += 1) {
  42. if (fByteStats[i] != 0) {
  43. fC1Bytes = true;
  44. break;
  45. }
  46. }
  47. var context = {
  48. fByteStats: fByteStats,
  49. fC1Bytes: fC1Bytes,
  50. fRawInput: buffer,
  51. fRawLength: buffer.length,
  52. fInputBytes: buffer,
  53. fInputLen: buffer.length
  54. };
  55. var matches = recognisers.map(function(rec) {
  56. return rec.match(context);
  57. }).filter(function(match) {
  58. return !!match;
  59. }).sort(function(a, b) {
  60. return b.confidence - a.confidence;
  61. });
  62. if (opts && opts.returnAllMatches === true) {
  63. return matches;
  64. }
  65. else {
  66. return matches.length > 0 ? matches[0].name : null;
  67. }
  68. };
  69. module.exports.detectFile = function(filepath, opts, cb) {
  70. if (typeof opts === 'function') {
  71. cb = opts;
  72. opts = undefined;
  73. }
  74. var fd;
  75. var handler = function(err, buffer) {
  76. if (fd) {
  77. fs.closeSync(fd);
  78. }
  79. if (err) return cb(err, null);
  80. cb(null, self.detect(buffer, opts));
  81. };
  82. if (opts && opts.sampleSize) {
  83. fd = fs.openSync(filepath, 'r'),
  84. sample = Buffer.allocUnsafe(opts.sampleSize);
  85. fs.read(fd, sample, 0, opts.sampleSize, null, function(err) {
  86. handler(err, sample);
  87. });
  88. return;
  89. }
  90. fs.readFile(filepath, handler);
  91. };
  92. module.exports.detectFileSync = function(filepath, opts) {
  93. if (opts && opts.sampleSize) {
  94. var fd = fs.openSync(filepath, 'r'),
  95. sample = Buffer.allocUnsafe(opts.sampleSize);
  96. fs.readSync(fd, sample, 0, opts.sampleSize);
  97. fs.closeSync(fd);
  98. return self.detect(sample, opts);
  99. }
  100. return self.detect(fs.readFileSync(filepath), opts);
  101. };
  102. // Wrappers for the previous functions to return all encodings
  103. module.exports.detectAll = function(buffer, opts) {
  104. if (typeof opts !== 'object') {
  105. opts = {};
  106. }
  107. opts.returnAllMatches = true;
  108. return self.detect(buffer, opts);
  109. }
  110. module.exports.detectFileAll = function(filepath, opts, cb) {
  111. if (typeof opts === 'function') {
  112. cb = opts;
  113. opts = undefined;
  114. }
  115. if (typeof opts !== 'object') {
  116. opts = {};
  117. }
  118. opts.returnAllMatches = true;
  119. self.detectFile(filepath, opts, cb);
  120. }
  121. module.exports.detectFileAllSync = function(filepath, opts) {
  122. if (typeof opts !== 'object') {
  123. opts = {};
  124. }
  125. opts.returnAllMatches = true;
  126. return self.detectFileSync(filepath, opts);
  127. }