prism-js-templates.js 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349
  1. (function (Prism) {
  2. var templateString = Prism.languages.javascript['template-string'];
  3. // see the pattern in prism-javascript.js
  4. var templateLiteralPattern = templateString.pattern.source;
  5. var interpolationObject = templateString.inside['interpolation'];
  6. var interpolationPunctuationObject = interpolationObject.inside['interpolation-punctuation'];
  7. var interpolationPattern = interpolationObject.pattern.source;
  8. /**
  9. * Creates a new pattern to match a template string with a special tag.
  10. *
  11. * This will return `undefined` if there is no grammar with the given language id.
  12. *
  13. * @param {string} language The language id of the embedded language. E.g. `markdown`.
  14. * @param {string} tag The regex pattern to match the tag.
  15. * @returns {object | undefined}
  16. * @example
  17. * createTemplate('css', /\bcss/.source);
  18. */
  19. function createTemplate(language, tag) {
  20. if (!Prism.languages[language]) {
  21. return undefined;
  22. }
  23. return {
  24. pattern: RegExp('((?:' + tag + ')\\s*)' + templateLiteralPattern),
  25. lookbehind: true,
  26. greedy: true,
  27. inside: {
  28. 'template-punctuation': {
  29. pattern: /^`|`$/,
  30. alias: 'string'
  31. },
  32. 'embedded-code': {
  33. pattern: /[\s\S]+/,
  34. alias: language
  35. }
  36. }
  37. };
  38. }
  39. Prism.languages.javascript['template-string'] = [
  40. // styled-jsx:
  41. // css`a { color: #25F; }`
  42. // styled-components:
  43. // styled.h1`color: red;`
  44. createTemplate('css', /\b(?:styled(?:\([^)]*\))?(?:\s*\.\s*\w+(?:\([^)]*\))*)*|css(?:\s*\.\s*(?:global|resolve))?|createGlobalStyle|keyframes)/.source),
  45. // html`<p></p>`
  46. // div.innerHTML = `<p></p>`
  47. createTemplate('html', /\bhtml|\.\s*(?:inner|outer)HTML\s*\+?=/.source),
  48. // svg`<path fill="#fff" d="M55.37 ..."/>`
  49. createTemplate('svg', /\bsvg/.source),
  50. // md`# h1`, markdown`## h2`
  51. createTemplate('markdown', /\b(?:markdown|md)/.source),
  52. // gql`...`, graphql`...`, graphql.experimental`...`
  53. createTemplate('graphql', /\b(?:gql|graphql(?:\s*\.\s*experimental)?)/.source),
  54. // sql`...`
  55. createTemplate('sql', /\bsql/.source),
  56. // vanilla template string
  57. templateString
  58. ].filter(Boolean);
  59. /**
  60. * Returns a specific placeholder literal for the given language.
  61. *
  62. * @param {number} counter
  63. * @param {string} language
  64. * @returns {string}
  65. */
  66. function getPlaceholder(counter, language) {
  67. return '___' + language.toUpperCase() + '_' + counter + '___';
  68. }
  69. /**
  70. * Returns the tokens of `Prism.tokenize` but also runs the `before-tokenize` and `after-tokenize` hooks.
  71. *
  72. * @param {string} code
  73. * @param {any} grammar
  74. * @param {string} language
  75. * @returns {(string|Token)[]}
  76. */
  77. function tokenizeWithHooks(code, grammar, language) {
  78. var env = {
  79. code: code,
  80. grammar: grammar,
  81. language: language
  82. };
  83. Prism.hooks.run('before-tokenize', env);
  84. env.tokens = Prism.tokenize(env.code, env.grammar);
  85. Prism.hooks.run('after-tokenize', env);
  86. return env.tokens;
  87. }
  88. /**
  89. * Returns the token of the given JavaScript interpolation expression.
  90. *
  91. * @param {string} expression The code of the expression. E.g. `"${42}"`
  92. * @returns {Token}
  93. */
  94. function tokenizeInterpolationExpression(expression) {
  95. var tempGrammar = {};
  96. tempGrammar['interpolation-punctuation'] = interpolationPunctuationObject;
  97. /** @type {Array} */
  98. var tokens = Prism.tokenize(expression, tempGrammar);
  99. if (tokens.length === 3) {
  100. /**
  101. * The token array will look like this
  102. * [
  103. * ["interpolation-punctuation", "${"]
  104. * "..." // JavaScript expression of the interpolation
  105. * ["interpolation-punctuation", "}"]
  106. * ]
  107. */
  108. var args = [1, 1];
  109. args.push.apply(args, tokenizeWithHooks(tokens[1], Prism.languages.javascript, 'javascript'));
  110. tokens.splice.apply(tokens, args);
  111. }
  112. return new Prism.Token('interpolation', tokens, interpolationObject.alias, expression);
  113. }
  114. /**
  115. * Tokenizes the given code with support for JavaScript interpolation expressions mixed in.
  116. *
  117. * This function has 3 phases:
  118. *
  119. * 1. Replace all JavaScript interpolation expression with a placeholder.
  120. * The placeholder will have the syntax of a identify of the target language.
  121. * 2. Tokenize the code with placeholders.
  122. * 3. Tokenize the interpolation expressions and re-insert them into the tokenize code.
  123. * The insertion only works if a placeholder hasn't been "ripped apart" meaning that the placeholder has been
  124. * tokenized as two tokens by the grammar of the embedded language.
  125. *
  126. * @param {string} code
  127. * @param {object} grammar
  128. * @param {string} language
  129. * @returns {Token}
  130. */
  131. function tokenizeEmbedded(code, grammar, language) {
  132. // 1. First filter out all interpolations
  133. // because they might be escaped, we need a lookbehind, so we use Prism
  134. /** @type {(Token|string)[]} */
  135. var _tokens = Prism.tokenize(code, {
  136. 'interpolation': {
  137. pattern: RegExp(interpolationPattern),
  138. lookbehind: true
  139. }
  140. });
  141. // replace all interpolations with a placeholder which is not in the code already
  142. var placeholderCounter = 0;
  143. /** @type {Object<string, string>} */
  144. var placeholderMap = {};
  145. var embeddedCode = _tokens.map(function (token) {
  146. if (typeof token === 'string') {
  147. return token;
  148. } else {
  149. var interpolationExpression = token.content;
  150. var placeholder;
  151. while (code.indexOf(placeholder = getPlaceholder(placeholderCounter++, language)) !== -1) { /* noop */ }
  152. placeholderMap[placeholder] = interpolationExpression;
  153. return placeholder;
  154. }
  155. }).join('');
  156. // 2. Tokenize the embedded code
  157. var embeddedTokens = tokenizeWithHooks(embeddedCode, grammar, language);
  158. // 3. Re-insert the interpolation
  159. var placeholders = Object.keys(placeholderMap);
  160. placeholderCounter = 0;
  161. /**
  162. *
  163. * @param {(Token|string)[]} tokens
  164. * @returns {void}
  165. */
  166. function walkTokens(tokens) {
  167. for (var i = 0; i < tokens.length; i++) {
  168. if (placeholderCounter >= placeholders.length) {
  169. return;
  170. }
  171. var token = tokens[i];
  172. if (typeof token === 'string' || typeof token.content === 'string') {
  173. var placeholder = placeholders[placeholderCounter];
  174. var s = typeof token === 'string' ? token : /** @type {string} */ (token.content);
  175. var index = s.indexOf(placeholder);
  176. if (index !== -1) {
  177. ++placeholderCounter;
  178. var before = s.substring(0, index);
  179. var middle = tokenizeInterpolationExpression(placeholderMap[placeholder]);
  180. var after = s.substring(index + placeholder.length);
  181. var replacement = [];
  182. if (before) {
  183. replacement.push(before);
  184. }
  185. replacement.push(middle);
  186. if (after) {
  187. var afterTokens = [after];
  188. walkTokens(afterTokens);
  189. replacement.push.apply(replacement, afterTokens);
  190. }
  191. if (typeof token === 'string') {
  192. tokens.splice.apply(tokens, [i, 1].concat(replacement));
  193. i += replacement.length - 1;
  194. } else {
  195. token.content = replacement;
  196. }
  197. }
  198. } else {
  199. var content = token.content;
  200. if (Array.isArray(content)) {
  201. walkTokens(content);
  202. } else {
  203. walkTokens([content]);
  204. }
  205. }
  206. }
  207. }
  208. walkTokens(embeddedTokens);
  209. return new Prism.Token(language, embeddedTokens, 'language-' + language, code);
  210. }
  211. /**
  212. * The languages for which JS templating will handle tagged template literals.
  213. *
  214. * JS templating isn't active for only JavaScript but also related languages like TypeScript, JSX, and TSX.
  215. */
  216. var supportedLanguages = {
  217. 'javascript': true,
  218. 'js': true,
  219. 'typescript': true,
  220. 'ts': true,
  221. 'jsx': true,
  222. 'tsx': true,
  223. };
  224. Prism.hooks.add('after-tokenize', function (env) {
  225. if (!(env.language in supportedLanguages)) {
  226. return;
  227. }
  228. /**
  229. * Finds and tokenizes all template strings with an embedded languages.
  230. *
  231. * @param {(Token | string)[]} tokens
  232. * @returns {void}
  233. */
  234. function findTemplateStrings(tokens) {
  235. for (var i = 0, l = tokens.length; i < l; i++) {
  236. var token = tokens[i];
  237. if (typeof token === 'string') {
  238. continue;
  239. }
  240. var content = token.content;
  241. if (!Array.isArray(content)) {
  242. if (typeof content !== 'string') {
  243. findTemplateStrings([content]);
  244. }
  245. continue;
  246. }
  247. if (token.type === 'template-string') {
  248. /**
  249. * A JavaScript template-string token will look like this:
  250. *
  251. * ["template-string", [
  252. * ["template-punctuation", "`"],
  253. * (
  254. * An array of "string" and "interpolation" tokens. This is the simple string case.
  255. * or
  256. * ["embedded-code", "..."] This is the token containing the embedded code.
  257. * It also has an alias which is the language of the embedded code.
  258. * ),
  259. * ["template-punctuation", "`"]
  260. * ]]
  261. */
  262. var embedded = content[1];
  263. if (content.length === 3 && typeof embedded !== 'string' && embedded.type === 'embedded-code') {
  264. // get string content
  265. var code = stringContent(embedded);
  266. var alias = embedded.alias;
  267. var language = Array.isArray(alias) ? alias[0] : alias;
  268. var grammar = Prism.languages[language];
  269. if (!grammar) {
  270. // the embedded language isn't registered.
  271. continue;
  272. }
  273. content[1] = tokenizeEmbedded(code, grammar, language);
  274. }
  275. } else {
  276. findTemplateStrings(content);
  277. }
  278. }
  279. }
  280. findTemplateStrings(env.tokens);
  281. });
  282. /**
  283. * Returns the string content of a token or token stream.
  284. *
  285. * @param {string | Token | (string | Token)[]} value
  286. * @returns {string}
  287. */
  288. function stringContent(value) {
  289. if (typeof value === 'string') {
  290. return value;
  291. } else if (Array.isArray(value)) {
  292. return value.map(stringContent).join('');
  293. } else {
  294. return stringContent(value.content);
  295. }
  296. }
  297. }(Prism));