regjsgen.js 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425
  1. /*!
  2. * regjsgen 0.8.0
  3. * Copyright 2014-2023 Benjamin Tan <https://ofcr.se/>
  4. * Available under the MIT license <https://github.com/bnjmnt4n/regjsgen/blob/main/LICENSE-MIT.txt>
  5. */
  6. ;(function() {
  7. 'use strict';
  8. // Used to determine if values are of the language type `Object`.
  9. var objectTypes = {
  10. 'function': true,
  11. 'object': true
  12. };
  13. // Used as a reference to the global object.
  14. var root = (objectTypes[typeof window] && window) || this;
  15. // Detect free variable `exports`.
  16. var freeExports = objectTypes[typeof exports] && exports && !exports.nodeType && exports;
  17. // Detect free variable `module`.
  18. var hasFreeModule = objectTypes[typeof module] && module && !module.nodeType;
  19. // Detect free variable `global` from Node.js or Browserified code and use it as `root`.
  20. var freeGlobal = freeExports && hasFreeModule && typeof global == 'object' && global;
  21. if (freeGlobal && (freeGlobal.global === freeGlobal || freeGlobal.window === freeGlobal || freeGlobal.self === freeGlobal)) {
  22. root = freeGlobal;
  23. }
  24. // Used to check objects for own properties.
  25. var hasOwnProperty = Object.prototype.hasOwnProperty;
  26. /*--------------------------------------------------------------------------*/
  27. // Generates a string based on the given code point.
  28. // Based on https://mths.be/fromcodepoint by @mathias.
  29. function fromCodePoint() {
  30. var codePoint = Number(arguments[0]);
  31. if (
  32. !isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity`
  33. codePoint < 0 || // not a valid Unicode code point
  34. codePoint > 0x10FFFF || // not a valid Unicode code point
  35. Math.floor(codePoint) != codePoint // not an integer
  36. ) {
  37. throw RangeError('Invalid code point: ' + codePoint);
  38. }
  39. if (codePoint <= 0xFFFF) {
  40. // BMP code point
  41. return String.fromCharCode(codePoint);
  42. } else {
  43. // Astral code point; split in surrogate halves
  44. // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
  45. codePoint -= 0x10000;
  46. var highSurrogate = (codePoint >> 10) + 0xD800;
  47. var lowSurrogate = (codePoint % 0x400) + 0xDC00;
  48. return String.fromCharCode(highSurrogate, lowSurrogate);
  49. }
  50. }
  51. /*--------------------------------------------------------------------------*/
  52. // Ensures that nodes have the correct types.
  53. var assertTypeRegexMap = {};
  54. function assertType(type, expected) {
  55. if (expected.indexOf('|') == -1) {
  56. if (type == expected) {
  57. return;
  58. }
  59. throw Error('Invalid node type: ' + type + '; expected type: ' + expected);
  60. }
  61. expected = hasOwnProperty.call(assertTypeRegexMap, expected)
  62. ? assertTypeRegexMap[expected]
  63. : (assertTypeRegexMap[expected] = RegExp('^(?:' + expected + ')$'));
  64. if (expected.test(type)) {
  65. return;
  66. }
  67. throw Error('Invalid node type: ' + type + '; expected types: ' + expected);
  68. }
  69. /*--------------------------------------------------------------------------*/
  70. // Generates a regular expression string based on an AST.
  71. function generate(node) {
  72. var type = node.type;
  73. if (hasOwnProperty.call(generators, type)) {
  74. return generators[type](node);
  75. }
  76. throw Error('Invalid node type: ' + type);
  77. }
  78. // Constructs a string by concatentating the output of each term.
  79. function generateSequence(generator, terms, /* optional */ separator) {
  80. var i = -1,
  81. length = terms.length,
  82. result = '',
  83. term;
  84. while (++i < length) {
  85. term = terms[i];
  86. if (separator && i > 0) result += separator;
  87. // Ensure that `\0` null escapes followed by number symbols are not
  88. // treated as backreferences.
  89. if (
  90. i + 1 < length &&
  91. terms[i].type == 'value' &&
  92. terms[i].kind == 'null' &&
  93. terms[i + 1].type == 'value' &&
  94. terms[i + 1].kind == 'symbol' &&
  95. terms[i + 1].codePoint >= 48 &&
  96. terms[i + 1].codePoint <= 57
  97. ) {
  98. result += '\\000';
  99. continue;
  100. }
  101. result += generator(term);
  102. }
  103. return result;
  104. }
  105. /*--------------------------------------------------------------------------*/
  106. function generateAlternative(node) {
  107. assertType(node.type, 'alternative');
  108. return generateSequence(generateTerm, node.body);
  109. }
  110. function generateAnchor(node) {
  111. assertType(node.type, 'anchor');
  112. switch (node.kind) {
  113. case 'start':
  114. return '^';
  115. case 'end':
  116. return '$';
  117. case 'boundary':
  118. return '\\b';
  119. case 'not-boundary':
  120. return '\\B';
  121. default:
  122. throw Error('Invalid assertion');
  123. }
  124. }
  125. var atomType = 'anchor|characterClass|characterClassEscape|dot|group|reference|unicodePropertyEscape|value';
  126. function generateAtom(node) {
  127. assertType(node.type, atomType);
  128. return generate(node);
  129. }
  130. function generateCharacterClass(node) {
  131. assertType(node.type, 'characterClass');
  132. var kind = node.kind;
  133. var separator = kind === 'intersection' ? '&&' : kind === 'subtraction' ? '--' : '';
  134. return '[' +
  135. (node.negative ? '^' : '') +
  136. generateSequence(generateClassAtom, node.body, separator) +
  137. ']';
  138. }
  139. function generateCharacterClassEscape(node) {
  140. assertType(node.type, 'characterClassEscape');
  141. return '\\' + node.value;
  142. }
  143. function generateCharacterClassRange(node) {
  144. assertType(node.type, 'characterClassRange');
  145. var min = node.min,
  146. max = node.max;
  147. if (min.type == 'characterClassRange' || max.type == 'characterClassRange') {
  148. throw Error('Invalid character class range');
  149. }
  150. return generateClassAtom(min) + '-' + generateClassAtom(max);
  151. }
  152. function generateClassAtom(node) {
  153. assertType(node.type, 'anchor|characterClass|characterClassEscape|characterClassRange|dot|value|unicodePropertyEscape|classStrings');
  154. return generate(node);
  155. }
  156. function generateClassStrings(node) {
  157. assertType(node.type, 'classStrings');
  158. return '\\q{' + generateSequence(generateClassString, node.strings, '|') + '}';
  159. }
  160. function generateClassString(node) {
  161. assertType(node.type, 'classString');
  162. return generateSequence(generate, node.characters);
  163. }
  164. function generateDisjunction(node) {
  165. assertType(node.type, 'disjunction');
  166. return generateSequence(generate, node.body, '|');
  167. }
  168. function generateDot(node) {
  169. assertType(node.type, 'dot');
  170. return '.';
  171. }
  172. function generateGroup(node) {
  173. assertType(node.type, 'group');
  174. var result = '';
  175. switch (node.behavior) {
  176. case 'normal':
  177. if (node.name) {
  178. result += '?<' + generateIdentifier(node.name) + '>';
  179. }
  180. break;
  181. case 'ignore':
  182. if (node.modifierFlags) {
  183. result += '?';
  184. if (node.modifierFlags.enabling) result += node.modifierFlags.enabling;
  185. if (node.modifierFlags.disabling) result += "-" + node.modifierFlags.disabling;
  186. result += ':';
  187. } else {
  188. result += '?:';
  189. }
  190. break;
  191. case 'lookahead':
  192. result += '?=';
  193. break;
  194. case 'negativeLookahead':
  195. result += '?!';
  196. break;
  197. case 'lookbehind':
  198. result += '?<=';
  199. break;
  200. case 'negativeLookbehind':
  201. result += '?<!';
  202. break;
  203. default:
  204. throw Error('Invalid behaviour: ' + node.behaviour);
  205. }
  206. result += generateSequence(generate, node.body);
  207. return '(' + result + ')';
  208. }
  209. function generateIdentifier(node) {
  210. assertType(node.type, 'identifier');
  211. return node.value;
  212. }
  213. function generateQuantifier(node) {
  214. assertType(node.type, 'quantifier');
  215. var quantifier = '',
  216. min = node.min,
  217. max = node.max;
  218. if (max == null) {
  219. if (min == 0) {
  220. quantifier = '*';
  221. } else if (min == 1) {
  222. quantifier = '+';
  223. } else {
  224. quantifier = '{' + min + ',}';
  225. }
  226. } else if (min == max) {
  227. quantifier = '{' + min + '}';
  228. } else if (min == 0 && max == 1) {
  229. quantifier = '?';
  230. } else {
  231. quantifier = '{' + min + ',' + max + '}';
  232. }
  233. if (!node.greedy) {
  234. quantifier += '?';
  235. }
  236. return generateAtom(node.body[0]) + quantifier;
  237. }
  238. function generateReference(node) {
  239. assertType(node.type, 'reference');
  240. if (node.matchIndex) {
  241. return '\\' + node.matchIndex;
  242. }
  243. if (node.name) {
  244. return '\\k<' + generateIdentifier(node.name) + '>';
  245. }
  246. throw new Error('Unknown reference type');
  247. }
  248. function generateTerm(node) {
  249. assertType(node.type, atomType + '|empty|quantifier');
  250. return generate(node);
  251. }
  252. function generateUnicodePropertyEscape(node) {
  253. assertType(node.type, 'unicodePropertyEscape');
  254. return '\\' + (node.negative ? 'P' : 'p') + '{' + node.value + '}';
  255. }
  256. function generateValue(node) {
  257. assertType(node.type, 'value');
  258. var kind = node.kind,
  259. codePoint = node.codePoint;
  260. if (typeof codePoint != 'number') {
  261. throw new Error('Invalid code point: ' + codePoint);
  262. }
  263. switch (kind) {
  264. case 'controlLetter':
  265. return '\\c' + fromCodePoint(codePoint + 64);
  266. case 'hexadecimalEscape':
  267. return '\\x' + ('00' + codePoint.toString(16).toUpperCase()).slice(-2);
  268. case 'identifier':
  269. return '\\' + fromCodePoint(codePoint);
  270. case 'null':
  271. return '\\' + codePoint;
  272. case 'octal':
  273. return '\\' + ('000' + codePoint.toString(8)).slice(-3);
  274. case 'singleEscape':
  275. switch (codePoint) {
  276. case 0x0008:
  277. return '\\b';
  278. case 0x0009:
  279. return '\\t';
  280. case 0x000A:
  281. return '\\n';
  282. case 0x000B:
  283. return '\\v';
  284. case 0x000C:
  285. return '\\f';
  286. case 0x000D:
  287. return '\\r';
  288. case 0x002D:
  289. return '\\-';
  290. default:
  291. throw Error('Invalid code point: ' + codePoint);
  292. }
  293. case 'symbol':
  294. return fromCodePoint(codePoint);
  295. case 'unicodeEscape':
  296. return '\\u' + ('0000' + codePoint.toString(16).toUpperCase()).slice(-4);
  297. case 'unicodeCodePointEscape':
  298. return '\\u{' + codePoint.toString(16).toUpperCase() + '}';
  299. default:
  300. throw Error('Unsupported node kind: ' + kind);
  301. }
  302. }
  303. /*--------------------------------------------------------------------------*/
  304. // Used to generate strings for each node type.
  305. var generators = {
  306. 'alternative': generateAlternative,
  307. 'anchor': generateAnchor,
  308. 'characterClass': generateCharacterClass,
  309. 'characterClassEscape': generateCharacterClassEscape,
  310. 'characterClassRange': generateCharacterClassRange,
  311. 'classStrings': generateClassStrings,
  312. 'disjunction': generateDisjunction,
  313. 'dot': generateDot,
  314. 'group': generateGroup,
  315. 'quantifier': generateQuantifier,
  316. 'reference': generateReference,
  317. 'unicodePropertyEscape': generateUnicodePropertyEscape,
  318. 'value': generateValue
  319. };
  320. /*--------------------------------------------------------------------------*/
  321. // Export regjsgen.
  322. var regjsgen = {
  323. 'generate': generate
  324. };
  325. // Some AMD build optimizers, like r.js, check for condition patterns like the following:
  326. if (typeof define == 'function' && typeof define.amd == 'object' && define.amd) {
  327. // Define as an anonymous module so it can be aliased through path mapping.
  328. define(function() {
  329. return regjsgen;
  330. });
  331. root.regjsgen = regjsgen;
  332. }
  333. // Check for `exports` after `define` in case a build optimizer adds an `exports` object.
  334. else if (freeExports && hasFreeModule) {
  335. // Export for CommonJS support.
  336. freeExports.generate = generate;
  337. }
  338. else {
  339. // Export to the global object.
  340. root.regjsgen = regjsgen;
  341. }
  342. }.call(this));