escape.js 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. export const xmlReplacer = /["&'<>$\x80-\uFFFF]/g;
  2. const xmlCodeMap = new Map([
  3. [34, "&quot;"],
  4. [38, "&amp;"],
  5. [39, "&apos;"],
  6. [60, "&lt;"],
  7. [62, "&gt;"],
  8. ]);
  9. // For compatibility with node < 4, we wrap `codePointAt`
  10. export const getCodePoint =
  11. // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
  12. String.prototype.codePointAt != null
  13. ? (str, index) => str.codePointAt(index)
  14. : // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
  15. (c, index) => (c.charCodeAt(index) & 0xfc00) === 0xd800
  16. ? (c.charCodeAt(index) - 0xd800) * 0x400 +
  17. c.charCodeAt(index + 1) -
  18. 0xdc00 +
  19. 0x10000
  20. : c.charCodeAt(index);
  21. /**
  22. * Encodes all non-ASCII characters, as well as characters not valid in XML
  23. * documents using XML entities.
  24. *
  25. * If a character has no equivalent entity, a
  26. * numeric hexadecimal reference (eg. `&#xfc;`) will be used.
  27. */
  28. export function encodeXML(str) {
  29. let ret = "";
  30. let lastIdx = 0;
  31. let match;
  32. while ((match = xmlReplacer.exec(str)) !== null) {
  33. const i = match.index;
  34. const char = str.charCodeAt(i);
  35. const next = xmlCodeMap.get(char);
  36. if (next !== undefined) {
  37. ret += str.substring(lastIdx, i) + next;
  38. lastIdx = i + 1;
  39. }
  40. else {
  41. ret += `${str.substring(lastIdx, i)}&#x${getCodePoint(str, i).toString(16)};`;
  42. // Increase by 1 if we have a surrogate pair
  43. lastIdx = xmlReplacer.lastIndex += Number((char & 0xfc00) === 0xd800);
  44. }
  45. }
  46. return ret + str.substr(lastIdx);
  47. }
  48. /**
  49. * Encodes all non-ASCII characters, as well as characters not valid in XML
  50. * documents using numeric hexadecimal reference (eg. `&#xfc;`).
  51. *
  52. * Have a look at `escapeUTF8` if you want a more concise output at the expense
  53. * of reduced transportability.
  54. *
  55. * @param data String to escape.
  56. */
  57. export const escape = encodeXML;
  58. /**
  59. * Creates a function that escapes all characters matched by the given regular
  60. * expression using the given map of characters to escape to their entities.
  61. *
  62. * @param regex Regular expression to match characters to escape.
  63. * @param map Map of characters to escape to their entities.
  64. *
  65. * @returns Function that escapes all characters matched by the given regular
  66. * expression using the given map of characters to escape to their entities.
  67. */
  68. function getEscaper(regex, map) {
  69. return function escape(data) {
  70. let match;
  71. let lastIdx = 0;
  72. let result = "";
  73. while ((match = regex.exec(data))) {
  74. if (lastIdx !== match.index) {
  75. result += data.substring(lastIdx, match.index);
  76. }
  77. // We know that this character will be in the map.
  78. result += map.get(match[0].charCodeAt(0));
  79. // Every match will be of length 1
  80. lastIdx = match.index + 1;
  81. }
  82. return result + data.substring(lastIdx);
  83. };
  84. }
  85. /**
  86. * Encodes all characters not valid in XML documents using XML entities.
  87. *
  88. * Note that the output will be character-set dependent.
  89. *
  90. * @param data String to escape.
  91. */
  92. export const escapeUTF8 = getEscaper(/[&<>'"]/g, xmlCodeMap);
  93. /**
  94. * Encodes all characters that have to be escaped in HTML attributes,
  95. * following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
  96. *
  97. * @param data String to escape.
  98. */
  99. export const escapeAttribute = getEscaper(/["&\u00A0]/g, new Map([
  100. [34, "&quot;"],
  101. [38, "&amp;"],
  102. [160, "&nbsp;"],
  103. ]));
  104. /**
  105. * Encodes all characters that have to be escaped in HTML text,
  106. * following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
  107. *
  108. * @param data String to escape.
  109. */
  110. export const escapeText = getEscaper(/[&<>\u00A0]/g, new Map([
  111. [38, "&amp;"],
  112. [60, "&lt;"],
  113. [62, "&gt;"],
  114. [160, "&nbsp;"],
  115. ]));
  116. //# sourceMappingURL=escape.js.map