123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151 |
- var fs = require('fs');
- var utf8 = require('./encoding/utf8'),
- unicode = require('./encoding/unicode'),
- mbcs = require('./encoding/mbcs'),
- sbcs = require('./encoding/sbcs'),
- iso2022 = require('./encoding/iso2022');
- var self = this;
- var recognisers = [
- new utf8,
- new unicode.UTF_16BE,
- new unicode.UTF_16LE,
- new unicode.UTF_32BE,
- new unicode.UTF_32LE,
- new mbcs.sjis,
- new mbcs.big5,
- new mbcs.euc_jp,
- new mbcs.euc_kr,
- new mbcs.gb_18030,
- new iso2022.ISO_2022_JP,
- new iso2022.ISO_2022_KR,
- new iso2022.ISO_2022_CN,
- new sbcs.ISO_8859_1,
- new sbcs.ISO_8859_2,
- new sbcs.ISO_8859_5,
- new sbcs.ISO_8859_6,
- new sbcs.ISO_8859_7,
- new sbcs.ISO_8859_8,
- new sbcs.ISO_8859_9,
- new sbcs.windows_1251,
- new sbcs.windows_1256,
- new sbcs.KOI8_R
- ];
- module.exports.detect = function(buffer, opts) {
- // Tally up the byte occurence statistics.
- var fByteStats = [];
- for (var i = 0; i < 256; i++)
- fByteStats[i] = 0;
- for (var i = buffer.length - 1; i >= 0; i--)
- fByteStats[buffer[i] & 0x00ff]++;
- var fC1Bytes = false;
- for (var i = 0x80; i <= 0x9F; i += 1) {
- if (fByteStats[i] != 0) {
- fC1Bytes = true;
- break;
- }
- }
- var context = {
- fByteStats: fByteStats,
- fC1Bytes: fC1Bytes,
- fRawInput: buffer,
- fRawLength: buffer.length,
- fInputBytes: buffer,
- fInputLen: buffer.length
- };
- var matches = recognisers.map(function(rec) {
- return rec.match(context);
- }).filter(function(match) {
- return !!match;
- }).sort(function(a, b) {
- return b.confidence - a.confidence;
- });
- if (opts && opts.returnAllMatches === true) {
- return matches;
- }
- else {
- return matches.length > 0 ? matches[0].name : null;
- }
- };
- module.exports.detectFile = function(filepath, opts, cb) {
- if (typeof opts === 'function') {
- cb = opts;
- opts = undefined;
- }
- var fd;
- var handler = function(err, buffer) {
- if (fd) {
- fs.closeSync(fd);
- }
- if (err) return cb(err, null);
- cb(null, self.detect(buffer, opts));
- };
- if (opts && opts.sampleSize) {
- fd = fs.openSync(filepath, 'r'),
- sample = Buffer.allocUnsafe(opts.sampleSize);
- fs.read(fd, sample, 0, opts.sampleSize, null, function(err) {
- handler(err, sample);
- });
- return;
- }
- fs.readFile(filepath, handler);
- };
- module.exports.detectFileSync = function(filepath, opts) {
- if (opts && opts.sampleSize) {
- var fd = fs.openSync(filepath, 'r'),
- sample = Buffer.allocUnsafe(opts.sampleSize);
- fs.readSync(fd, sample, 0, opts.sampleSize);
- fs.closeSync(fd);
- return self.detect(sample, opts);
- }
- return self.detect(fs.readFileSync(filepath), opts);
- };
- // Wrappers for the previous functions to return all encodings
- module.exports.detectAll = function(buffer, opts) {
- if (typeof opts !== 'object') {
- opts = {};
- }
- opts.returnAllMatches = true;
- return self.detect(buffer, opts);
- }
- module.exports.detectFileAll = function(filepath, opts, cb) {
- if (typeof opts === 'function') {
- cb = opts;
- opts = undefined;
- }
- if (typeof opts !== 'object') {
- opts = {};
- }
- opts.returnAllMatches = true;
- self.detectFile(filepath, opts, cb);
- }
- module.exports.detectFileAllSync = function(filepath, opts) {
- if (typeof opts !== 'object') {
- opts = {};
- }
- opts.returnAllMatches = true;
- return self.detectFileSync(filepath, opts);
- }
|