default.js 10 KB


  1. /**
  2. * default settings
  3. *
  4. * @author Zongmin Lei<leizongmin@gmail.com>
  5. */
  6. var FilterCSS = require("cssfilter").FilterCSS;
  7. var getDefaultCSSWhiteList = require("cssfilter").getDefaultWhiteList;
  8. var _ = require("./util");
  9. function getDefaultWhiteList() {
  10. return {
  11. a: ["target", "href", "title"],
  12. abbr: ["title"],
  13. address: [],
  14. area: ["shape", "coords", "href", "alt"],
  15. article: [],
  16. aside: [],
  17. audio: [
  18. "autoplay",
  19. "controls",
  20. "crossorigin",
  21. "loop",
  22. "muted",
  23. "preload",
  24. "src",
  25. ],
  26. b: [],
  27. bdi: ["dir"],
  28. bdo: ["dir"],
  29. big: [],
  30. blockquote: ["cite"],
  31. br: [],
  32. caption: [],
  33. center: [],
  34. cite: [],
  35. code: [],
  36. col: ["align", "valign", "span", "width"],
  37. colgroup: ["align", "valign", "span", "width"],
  38. dd: [],
  39. del: ["datetime"],
  40. details: ["open"],
  41. div: [],
  42. dl: [],
  43. dt: [],
  44. em: [],
  45. figcaption: [],
  46. figure: [],
  47. font: ["color", "size", "face"],
  48. footer: [],
  49. h1: [],
  50. h2: [],
  51. h3: [],
  52. h4: [],
  53. h5: [],
  54. h6: [],
  55. header: [],
  56. hr: [],
  57. i: [],
  58. img: ["src", "alt", "title", "width", "height", "loading"],
  59. ins: ["datetime"],
  60. kbd: [],
  61. li: [],
  62. mark: [],
  63. nav: [],
  64. ol: [],
  65. p: [],
  66. pre: [],
  67. s: [],
  68. section: [],
  69. small: [],
  70. span: [],
  71. sub: [],
  72. summary: [],
  73. sup: [],
  74. strong: [],
  75. strike: [],
  76. table: ["width", "border", "align", "valign"],
  77. tbody: ["align", "valign"],
  78. td: ["width", "rowspan", "colspan", "align", "valign"],
  79. tfoot: ["align", "valign"],
  80. th: ["width", "rowspan", "colspan", "align", "valign"],
  81. thead: ["align", "valign"],
  82. tr: ["rowspan", "align", "valign"],
  83. tt: [],
  84. u: [],
  85. ul: [],
  86. video: [
  87. "autoplay",
  88. "controls",
  89. "crossorigin",
  90. "loop",
  91. "muted",
  92. "playsinline",
  93. "poster",
  94. "preload",
  95. "src",
  96. "height",
  97. "width",
  98. ],
  99. };
  100. }
  101. var defaultCSSFilter = new FilterCSS();
  102. /**
  103. * default onTag function
  104. *
  105. * @param {String} tag
  106. * @param {String} html
  107. * @param {Object} options
  108. * @return {String}
  109. */
  110. function onTag(tag, html, options) {
  111. // do nothing
  112. }
  113. /**
  114. * default onIgnoreTag function
  115. *
  116. * @param {String} tag
  117. * @param {String} html
  118. * @param {Object} options
  119. * @return {String}
  120. */
  121. function onIgnoreTag(tag, html, options) {
  122. // do nothing
  123. }
  124. /**
  125. * default onTagAttr function
  126. *
  127. * @param {String} tag
  128. * @param {String} name
  129. * @param {String} value
  130. * @return {String}
  131. */
  132. function onTagAttr(tag, name, value) {
  133. // do nothing
  134. }
  135. /**
  136. * default onIgnoreTagAttr function
  137. *
  138. * @param {String} tag
  139. * @param {String} name
  140. * @param {String} value
  141. * @return {String}
  142. */
  143. function onIgnoreTagAttr(tag, name, value) {
  144. // do nothing
  145. }
  146. /**
  147. * default escapeHtml function
  148. *
  149. * @param {String} html
  150. */
  151. function escapeHtml(html) {
  152. return html.replace(REGEXP_LT, "&lt;").replace(REGEXP_GT, "&gt;");
  153. }
  154. /**
  155. * default safeAttrValue function
  156. *
  157. * @param {String} tag
  158. * @param {String} name
  159. * @param {String} value
  160. * @param {Object} cssFilter
  161. * @return {String}
  162. */
  163. function safeAttrValue(tag, name, value, cssFilter) {
  164. // unescape attribute value firstly
  165. value = friendlyAttrValue(value);
  166. if (name === "href" || name === "src") {
  167. // filter `href` and `src` attribute
  168. // only allow the value that starts with `http://` | `https://` | `mailto:` | `/` | `#`
  169. value = _.trim(value);
  170. if (value === "#") return "#";
  171. if (
  172. !(
  173. value.substr(0, 7) === "http://" ||
  174. value.substr(0, 8) === "https://" ||
  175. value.substr(0, 7) === "mailto:" ||
  176. value.substr(0, 4) === "tel:" ||
  177. value.substr(0, 11) === "data:image/" ||
  178. value.substr(0, 6) === "ftp://" ||
  179. value.substr(0, 2) === "./" ||
  180. value.substr(0, 3) === "../" ||
  181. value[0] === "#" ||
  182. value[0] === "/"
  183. )
  184. ) {
  185. return "";
  186. }
  187. } else if (name === "background") {
  188. // filter `background` attribute (maybe no use)
  189. // `javascript:`
  190. REGEXP_DEFAULT_ON_TAG_ATTR_4.lastIndex = 0;
  191. if (REGEXP_DEFAULT_ON_TAG_ATTR_4.test(value)) {
  192. return "";
  193. }
  194. } else if (name === "style") {
  195. // `expression()`
  196. REGEXP_DEFAULT_ON_TAG_ATTR_7.lastIndex = 0;
  197. if (REGEXP_DEFAULT_ON_TAG_ATTR_7.test(value)) {
  198. return "";
  199. }
  200. // `url()`
  201. REGEXP_DEFAULT_ON_TAG_ATTR_8.lastIndex = 0;
  202. if (REGEXP_DEFAULT_ON_TAG_ATTR_8.test(value)) {
  203. REGEXP_DEFAULT_ON_TAG_ATTR_4.lastIndex = 0;
  204. if (REGEXP_DEFAULT_ON_TAG_ATTR_4.test(value)) {
  205. return "";
  206. }
  207. }
  208. if (cssFilter !== false) {
  209. cssFilter = cssFilter || defaultCSSFilter;
  210. value = cssFilter.process(value);
  211. }
  212. }
  213. // escape `<>"` before returns
  214. value = escapeAttrValue(value);
  215. return value;
  216. }
  217. // RegExp list
  218. var REGEXP_LT = /</g;
  219. var REGEXP_GT = />/g;
  220. var REGEXP_QUOTE = /"/g;
  221. var REGEXP_QUOTE_2 = /&quot;/g;
  222. var REGEXP_ATTR_VALUE_1 = /&#([a-zA-Z0-9]*);?/gim;
  223. var REGEXP_ATTR_VALUE_COLON = /&colon;?/gim;
  224. var REGEXP_ATTR_VALUE_NEWLINE = /&newline;?/gim;
  225. // var REGEXP_DEFAULT_ON_TAG_ATTR_3 = /\/\*|\*\//gm;
  226. var REGEXP_DEFAULT_ON_TAG_ATTR_4 =
  227. /((j\s*a\s*v\s*a|v\s*b|l\s*i\s*v\s*e)\s*s\s*c\s*r\s*i\s*p\s*t\s*|m\s*o\s*c\s*h\s*a):/gi;
  228. // var REGEXP_DEFAULT_ON_TAG_ATTR_5 = /^[\s"'`]*(d\s*a\s*t\s*a\s*)\:/gi;
  229. // var REGEXP_DEFAULT_ON_TAG_ATTR_6 = /^[\s"'`]*(d\s*a\s*t\s*a\s*)\:\s*image\//gi;
  230. var REGEXP_DEFAULT_ON_TAG_ATTR_7 =
  231. /e\s*x\s*p\s*r\s*e\s*s\s*s\s*i\s*o\s*n\s*\(.*/gi;
  232. var REGEXP_DEFAULT_ON_TAG_ATTR_8 = /u\s*r\s*l\s*\(.*/gi;
  233. /**
  234. * escape double quote
  235. *
  236. * @param {String} str
  237. * @return {String} str
  238. */
  239. function escapeQuote(str) {
  240. return str.replace(REGEXP_QUOTE, "&quot;");
  241. }
  242. /**
  243. * unescape double quote
  244. *
  245. * @param {String} str
  246. * @return {String} str
  247. */
  248. function unescapeQuote(str) {
  249. return str.replace(REGEXP_QUOTE_2, '"');
  250. }
  251. /**
  252. * escape html entities
  253. *
  254. * @param {String} str
  255. * @return {String}
  256. */
  257. function escapeHtmlEntities(str) {
  258. return str.replace(REGEXP_ATTR_VALUE_1, function replaceUnicode(str, code) {
  259. return code[0] === "x" || code[0] === "X"
  260. ? String.fromCharCode(parseInt(code.substr(1), 16))
  261. : String.fromCharCode(parseInt(code, 10));
  262. });
  263. }
  264. /**
  265. * escape html5 new danger entities
  266. *
  267. * @param {String} str
  268. * @return {String}
  269. */
  270. function escapeDangerHtml5Entities(str) {
  271. return str
  272. .replace(REGEXP_ATTR_VALUE_COLON, ":")
  273. .replace(REGEXP_ATTR_VALUE_NEWLINE, " ");
  274. }
  275. /**
  276. * clear nonprintable characters
  277. *
  278. * @param {String} str
  279. * @return {String}
  280. */
  281. function clearNonPrintableCharacter(str) {
  282. var str2 = "";
  283. for (var i = 0, len = str.length; i < len; i++) {
  284. str2 += str.charCodeAt(i) < 32 ? " " : str.charAt(i);
  285. }
  286. return _.trim(str2);
  287. }
  288. /**
  289. * get friendly attribute value
  290. *
  291. * @param {String} str
  292. * @return {String}
  293. */
  294. function friendlyAttrValue(str) {
  295. str = unescapeQuote(str);
  296. str = escapeHtmlEntities(str);
  297. str = escapeDangerHtml5Entities(str);
  298. str = clearNonPrintableCharacter(str);
  299. return str;
  300. }
  301. /**
  302. * unescape attribute value
  303. *
  304. * @param {String} str
  305. * @return {String}
  306. */
  307. function escapeAttrValue(str) {
  308. str = escapeQuote(str);
  309. str = escapeHtml(str);
  310. return str;
  311. }
  312. /**
  313. * `onIgnoreTag` function for removing all the tags that are not in whitelist
  314. */
  315. function onIgnoreTagStripAll() {
  316. return "";
  317. }
  318. /**
  319. * remove tag body
  320. * specify a `tags` list, if the tag is not in the `tags` list then process by the specify function (optional)
  321. *
  322. * @param {array} tags
  323. * @param {function} next
  324. */
  325. function StripTagBody(tags, next) {
  326. if (typeof next !== "function") {
  327. next = function () {};
  328. }
  329. var isRemoveAllTag = !Array.isArray(tags);
  330. function isRemoveTag(tag) {
  331. if (isRemoveAllTag) return true;
  332. return _.indexOf(tags, tag) !== -1;
  333. }
  334. var removeList = [];
  335. var posStart = false;
  336. return {
  337. onIgnoreTag: function (tag, html, options) {
  338. if (isRemoveTag(tag)) {
  339. if (options.isClosing) {
  340. var ret = "[/removed]";
  341. var end = options.position + ret.length;
  342. removeList.push([
  343. posStart !== false ? posStart : options.position,
  344. end,
  345. ]);
  346. posStart = false;
  347. return ret;
  348. } else {
  349. if (!posStart) {
  350. posStart = options.position;
  351. }
  352. return "[removed]";
  353. }
  354. } else {
  355. return next(tag, html, options);
  356. }
  357. },
  358. remove: function (html) {
  359. var rethtml = "";
  360. var lastPos = 0;
  361. _.forEach(removeList, function (pos) {
  362. rethtml += html.slice(lastPos, pos[0]);
  363. lastPos = pos[1];
  364. });
  365. rethtml += html.slice(lastPos);
  366. return rethtml;
  367. },
  368. };
  369. }
  370. /**
  371. * remove html comments
  372. *
  373. * @param {String} html
  374. * @return {String}
  375. */
  376. function stripCommentTag(html) {
  377. var retHtml = "";
  378. var lastPos = 0;
  379. while (lastPos < html.length) {
  380. var i = html.indexOf("<!--", lastPos);
  381. if (i === -1) {
  382. retHtml += html.slice(lastPos);
  383. break;
  384. }
  385. retHtml += html.slice(lastPos, i);
  386. var j = html.indexOf("-->", i);
  387. if (j === -1) {
  388. break;
  389. }
  390. lastPos = j + 3;
  391. }
  392. return retHtml;
  393. }
  394. /**
  395. * remove invisible characters
  396. *
  397. * @param {String} html
  398. * @return {String}
  399. */
  400. function stripBlankChar(html) {
  401. var chars = html.split("");
  402. chars = chars.filter(function (char) {
  403. var c = char.charCodeAt(0);
  404. if (c === 127) return false;
  405. if (c <= 31) {
  406. if (c === 10 || c === 13) return true;
  407. return false;
  408. }
  409. return true;
  410. });
  411. return chars.join("");
  412. }
  413. exports.whiteList = getDefaultWhiteList();
  414. exports.getDefaultWhiteList = getDefaultWhiteList;
  415. exports.onTag = onTag;
  416. exports.onIgnoreTag = onIgnoreTag;
  417. exports.onTagAttr = onTagAttr;
  418. exports.onIgnoreTagAttr = onIgnoreTagAttr;
  419. exports.safeAttrValue = safeAttrValue;
  420. exports.escapeHtml = escapeHtml;
  421. exports.escapeQuote = escapeQuote;
  422. exports.unescapeQuote = unescapeQuote;
  423. exports.escapeHtmlEntities = escapeHtmlEntities;
  424. exports.escapeDangerHtml5Entities = escapeDangerHtml5Entities;
  425. exports.clearNonPrintableCharacter = clearNonPrintableCharacter;
  426. exports.friendlyAttrValue = friendlyAttrValue;
  427. exports.escapeAttrValue = escapeAttrValue;
  428. exports.onIgnoreTagStripAll = onIgnoreTagStripAll;
  429. exports.StripTagBody = StripTagBody;
  430. exports.stripCommentTag = stripCommentTag;
  431. exports.stripBlankChar = stripBlankChar;
  432. exports.attributeWrapSign = '"';
  433. exports.cssFilter = defaultCSSFilter;
  434. exports.getDefaultCSSWhiteList = getDefaultCSSWhiteList;