char-source.js 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240
  1. /**
  2. * @fileoverview Utility functions to locate the source text of each code unit in the value of a string literal or template token.
  3. * @author Francesco Trotta
  4. */
  5. "use strict";
  6. /**
  7. * Represents a code unit produced by the evaluation of a JavaScript common token like a string
  8. * literal or template token.
  9. */
  10. class CodeUnit {
  11. constructor(start, source) {
  12. this.start = start;
  13. this.source = source;
  14. }
  15. get end() {
  16. return this.start + this.length;
  17. }
  18. get length() {
  19. return this.source.length;
  20. }
  21. }
  22. /**
  23. * An object used to keep track of the position in a source text where the next characters will be read.
  24. */
  25. class TextReader {
  26. constructor(source) {
  27. this.source = source;
  28. this.pos = 0;
  29. }
  30. /**
  31. * Advances the reading position of the specified number of characters.
  32. * @param {number} length Number of characters to advance.
  33. * @returns {void}
  34. */
  35. advance(length) {
  36. this.pos += length;
  37. }
  38. /**
  39. * Reads characters from the source.
  40. * @param {number} [offset=0] The offset where reading starts, relative to the current position.
  41. * @param {number} [length=1] Number of characters to read.
  42. * @returns {string} A substring of source characters.
  43. */
  44. read(offset = 0, length = 1) {
  45. const start = offset + this.pos;
  46. return this.source.slice(start, start + length);
  47. }
  48. }
  49. const SIMPLE_ESCAPE_SEQUENCES =
  50. { __proto__: null, b: "\b", f: "\f", n: "\n", r: "\r", t: "\t", v: "\v" };
  51. /**
  52. * Reads a hex escape sequence.
  53. * @param {TextReader} reader The reader should be positioned on the first hexadecimal digit.
  54. * @param {number} length The number of hexadecimal digits.
  55. * @returns {string} A code unit.
  56. */
  57. function readHexSequence(reader, length) {
  58. const str = reader.read(0, length);
  59. const charCode = parseInt(str, 16);
  60. reader.advance(length);
  61. return String.fromCharCode(charCode);
  62. }
  63. /**
  64. * Reads a Unicode escape sequence.
  65. * @param {TextReader} reader The reader should be positioned after the "u".
  66. * @returns {string} A code unit.
  67. */
  68. function readUnicodeSequence(reader) {
  69. const regExp = /\{(?<hexDigits>[\dA-Fa-f]+)\}/uy;
  70. regExp.lastIndex = reader.pos;
  71. const match = regExp.exec(reader.source);
  72. if (match) {
  73. const codePoint = parseInt(match.groups.hexDigits, 16);
  74. reader.pos = regExp.lastIndex;
  75. return String.fromCodePoint(codePoint);
  76. }
  77. return readHexSequence(reader, 4);
  78. }
  79. /**
  80. * Reads an octal escape sequence.
  81. * @param {TextReader} reader The reader should be positioned after the first octal digit.
  82. * @param {number} maxLength The maximum number of octal digits.
  83. * @returns {string} A code unit.
  84. */
  85. function readOctalSequence(reader, maxLength) {
  86. const [octalStr] = reader.read(-1, maxLength).match(/^[0-7]+/u);
  87. reader.advance(octalStr.length - 1);
  88. const octal = parseInt(octalStr, 8);
  89. return String.fromCharCode(octal);
  90. }
  91. /**
  92. * Reads an escape sequence or line continuation.
  93. * @param {TextReader} reader The reader should be positioned on the backslash.
  94. * @returns {string} A string of zero, one or two code units.
  95. */
  96. function readEscapeSequenceOrLineContinuation(reader) {
  97. const char = reader.read(1);
  98. reader.advance(2);
  99. const unitChar = SIMPLE_ESCAPE_SEQUENCES[char];
  100. if (unitChar) {
  101. return unitChar;
  102. }
  103. switch (char) {
  104. case "x":
  105. return readHexSequence(reader, 2);
  106. case "u":
  107. return readUnicodeSequence(reader);
  108. case "\r":
  109. if (reader.read() === "\n") {
  110. reader.advance(1);
  111. }
  112. // fallthrough
  113. case "\n":
  114. case "\u2028":
  115. case "\u2029":
  116. return "";
  117. case "0":
  118. case "1":
  119. case "2":
  120. case "3":
  121. return readOctalSequence(reader, 3);
  122. case "4":
  123. case "5":
  124. case "6":
  125. case "7":
  126. return readOctalSequence(reader, 2);
  127. default:
  128. return char;
  129. }
  130. }
  131. /**
  132. * Reads an escape sequence or line continuation and generates the respective `CodeUnit` elements.
  133. * @param {TextReader} reader The reader should be positioned on the backslash.
  134. * @returns {Generator<CodeUnit>} Zero, one or two `CodeUnit` elements.
  135. */
  136. function *mapEscapeSequenceOrLineContinuation(reader) {
  137. const start = reader.pos;
  138. const str = readEscapeSequenceOrLineContinuation(reader);
  139. const end = reader.pos;
  140. const source = reader.source.slice(start, end);
  141. switch (str.length) {
  142. case 0:
  143. break;
  144. case 1:
  145. yield new CodeUnit(start, source);
  146. break;
  147. default:
  148. yield new CodeUnit(start, source);
  149. yield new CodeUnit(start, source);
  150. break;
  151. }
  152. }
  153. /**
  154. * Parses a string literal.
  155. * @param {string} source The string literal to parse, including the delimiting quotes.
  156. * @returns {CodeUnit[]} A list of code units produced by the string literal.
  157. */
  158. function parseStringLiteral(source) {
  159. const reader = new TextReader(source);
  160. const quote = reader.read();
  161. reader.advance(1);
  162. const codeUnits = [];
  163. for (;;) {
  164. const char = reader.read();
  165. if (char === quote) {
  166. break;
  167. }
  168. if (char === "\\") {
  169. codeUnits.push(...mapEscapeSequenceOrLineContinuation(reader));
  170. } else {
  171. codeUnits.push(new CodeUnit(reader.pos, char));
  172. reader.advance(1);
  173. }
  174. }
  175. return codeUnits;
  176. }
  177. /**
  178. * Parses a template token.
  179. * @param {string} source The template token to parse, including the delimiting sequences `` ` ``, `${` and `}`.
  180. * @returns {CodeUnit[]} A list of code units produced by the template token.
  181. */
  182. function parseTemplateToken(source) {
  183. const reader = new TextReader(source);
  184. reader.advance(1);
  185. const codeUnits = [];
  186. for (;;) {
  187. const char = reader.read();
  188. if (char === "`" || char === "$" && reader.read(1) === "{") {
  189. break;
  190. }
  191. if (char === "\\") {
  192. codeUnits.push(...mapEscapeSequenceOrLineContinuation(reader));
  193. } else {
  194. let unitSource;
  195. if (char === "\r" && reader.read(1) === "\n") {
  196. unitSource = "\r\n";
  197. } else {
  198. unitSource = char;
  199. }
  200. codeUnits.push(new CodeUnit(reader.pos, unitSource));
  201. reader.advance(unitSource.length);
  202. }
  203. }
  204. return codeUnits;
  205. }
  206. module.exports = { parseStringLiteral, parseTemplateToken };