12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485 |
- const utils = module.exports = {};
- const entities = require('entities');
- const xml2js = require('xml2js');
- utils.stripHtml = function(str) {
- str = str.replace(/([^\n])<\/?(h|br|p|ul|ol|li|blockquote|section|table|tr|div)(?:.|\n)*?>([^\n])/gm, '$1\n$3')
- str = str.replace(/<(?:.|\n)*?>/gm, '');
- return str;
- }
- utils.getSnippet = function(str) {
- return entities.decodeHTML(utils.stripHtml(str)).trim();
- }
- utils.getLink = function(links, rel, fallbackIdx) {
- if (!links) return;
- for (let i = 0; i < links.length; ++i) {
- if (links[i].$.rel === rel) return links[i].$.href;
- }
- if (links[fallbackIdx]) return links[fallbackIdx].$.href;
- }
- utils.getContent = function(content) {
- if (typeof content._ === 'string') {
- return content._;
- } else if (typeof content === 'object') {
- let builder = new xml2js.Builder({headless: true, explicitRoot: true, rootName: 'div', renderOpts: {pretty: false}});
- return builder.buildObject(content);
- } else {
- return content;
- }
- }
- utils.copyFromXML = function(xml, dest, fields) {
- fields.forEach(function(f) {
- let from = f;
- let to = f;
- let options = {};
- if (Array.isArray(f)) {
- from = f[0];
- to = f[1];
- if (f.length > 2) {
- options = f[2];
- }
- }
- const { keepArray, includeSnippet } = options;
- if (xml[from] !== undefined){
- dest[to] = keepArray ? xml[from] : xml[from][0];
- }
- if (dest[to] && typeof dest[to]._ === 'string') {
- dest[to]=dest[to]._;
- }
- if (includeSnippet && dest[to] && typeof dest[to] === 'string') {
- dest[to + 'Snippet'] = utils.getSnippet(dest[to]);
- }
- })
- }
- utils.maybePromisify = function(callback, promise) {
- if (!callback) return promise;
- return promise.then(
- data => setTimeout(() => callback(null, data)),
- err => setTimeout(() => callback(err))
- );
- }
- const DEFAULT_ENCODING = 'utf8';
- const ENCODING_REGEX = /(encoding|charset)\s*=\s*(\S+)/;
- const SUPPORTED_ENCODINGS = ['ascii', 'utf8', 'utf16le', 'ucs2', 'base64', 'latin1', 'binary', 'hex'];
- const ENCODING_ALIASES = {
- 'utf-8': 'utf8',
- 'iso-8859-1': 'latin1',
- }
- utils.getEncodingFromContentType = function(contentType) {
- contentType = contentType || '';
- let match = contentType.match(ENCODING_REGEX);
- let encoding = (match || [])[2] || '';
- encoding = encoding.toLowerCase();
- encoding = ENCODING_ALIASES[encoding] || encoding;
- if (!encoding || SUPPORTED_ENCODINGS.indexOf(encoding) === -1) {
- encoding = DEFAULT_ENCODING;
- }
- return encoding;
- }
|