encode.js 2.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. "use strict";
  2. var __importDefault = (this && this.__importDefault) || function (mod) {
  3. return (mod && mod.__esModule) ? mod : { "default": mod };
  4. };
  5. Object.defineProperty(exports, "__esModule", { value: true });
  6. exports.encodeNonAsciiHTML = exports.encodeHTML = void 0;
  7. var encode_html_js_1 = __importDefault(require("./generated/encode-html.js"));
  8. var escape_js_1 = require("./escape.js");
  9. var htmlReplacer = /[\t\n!-,./:-@[-`\f{-}$\x80-\uFFFF]/g;
  10. /**
  11. * Encodes all characters in the input using HTML entities. This includes
  12. * characters that are valid ASCII characters in HTML documents, such as `#`.
  13. *
  14. * To get a more compact output, consider using the `encodeNonAsciiHTML`
  15. * function, which will only encode characters that are not valid in HTML
  16. * documents, as well as non-ASCII characters.
  17. *
  18. * If a character has no equivalent entity, a numeric hexadecimal reference
  19. * (eg. `ü`) will be used.
  20. */
  21. function encodeHTML(data) {
  22. return encodeHTMLTrieRe(htmlReplacer, data);
  23. }
  24. exports.encodeHTML = encodeHTML;
  25. /**
  26. * Encodes all non-ASCII characters, as well as characters not valid in HTML
  27. * documents using HTML entities. This function will not encode characters that
  28. * are valid in HTML documents, such as `#`.
  29. *
  30. * If a character has no equivalent entity, a numeric hexadecimal reference
  31. * (eg. `ü`) will be used.
  32. */
  33. function encodeNonAsciiHTML(data) {
  34. return encodeHTMLTrieRe(escape_js_1.xmlReplacer, data);
  35. }
  36. exports.encodeNonAsciiHTML = encodeNonAsciiHTML;
  37. function encodeHTMLTrieRe(regExp, str) {
  38. var ret = "";
  39. var lastIdx = 0;
  40. var match;
  41. while ((match = regExp.exec(str)) !== null) {
  42. var i = match.index;
  43. ret += str.substring(lastIdx, i);
  44. var char = str.charCodeAt(i);
  45. var next = encode_html_js_1.default.get(char);
  46. if (typeof next === "object") {
  47. // We are in a branch. Try to match the next char.
  48. if (i + 1 < str.length) {
  49. var nextChar = str.charCodeAt(i + 1);
  50. var value = typeof next.n === "number"
  51. ? next.n === nextChar
  52. ? next.o
  53. : undefined
  54. : next.n.get(nextChar);
  55. if (value !== undefined) {
  56. ret += value;
  57. lastIdx = regExp.lastIndex += 1;
  58. continue;
  59. }
  60. }
  61. next = next.v;
  62. }
  63. // We might have a tree node without a value; skip and use a numeric entity.
  64. if (next !== undefined) {
  65. ret += next;
  66. lastIdx = i + 1;
  67. }
  68. else {
  69. var cp = (0, escape_js_1.getCodePoint)(str, i);
  70. ret += "&#x".concat(cp.toString(16), ";");
  71. // Increase by 1 if we have a surrogate pair
  72. lastIdx = regExp.lastIndex += Number(cp !== char);
  73. }
  74. }
  75. return ret + str.substr(lastIdx);
  76. }
  77. //# sourceMappingURL=encode.js.map