prism-markdown.js 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415
  1. (function (Prism) {
  2. // Allow only one line break
  3. var inner = /(?:\\.|[^\\\n\r]|(?:\n|\r\n?)(?![\r\n]))/.source;
  4. /**
  5. * This function is intended for the creation of the bold or italic pattern.
  6. *
  7. * This also adds a lookbehind group to the given pattern to ensure that the pattern is not backslash-escaped.
  8. *
  9. * _Note:_ Keep in mind that this adds a capturing group.
  10. *
  11. * @param {string} pattern
  12. * @returns {RegExp}
  13. */
  14. function createInline(pattern) {
  15. pattern = pattern.replace(/<inner>/g, function () { return inner; });
  16. return RegExp(/((?:^|[^\\])(?:\\{2})*)/.source + '(?:' + pattern + ')');
  17. }
  18. var tableCell = /(?:\\.|``(?:[^`\r\n]|`(?!`))+``|`[^`\r\n]+`|[^\\|\r\n`])+/.source;
  19. var tableRow = /\|?__(?:\|__)+\|?(?:(?:\n|\r\n?)|(?![\s\S]))/.source.replace(/__/g, function () { return tableCell; });
  20. var tableLine = /\|?[ \t]*:?-{3,}:?[ \t]*(?:\|[ \t]*:?-{3,}:?[ \t]*)+\|?(?:\n|\r\n?)/.source;
  21. Prism.languages.markdown = Prism.languages.extend('markup', {});
  22. Prism.languages.insertBefore('markdown', 'prolog', {
  23. 'front-matter-block': {
  24. pattern: /(^(?:\s*[\r\n])?)---(?!.)[\s\S]*?[\r\n]---(?!.)/,
  25. lookbehind: true,
  26. greedy: true,
  27. inside: {
  28. 'punctuation': /^---|---$/,
  29. 'front-matter': {
  30. pattern: /\S+(?:\s+\S+)*/,
  31. alias: ['yaml', 'language-yaml'],
  32. inside: Prism.languages.yaml
  33. }
  34. }
  35. },
  36. 'blockquote': {
  37. // > ...
  38. pattern: /^>(?:[\t ]*>)*/m,
  39. alias: 'punctuation'
  40. },
  41. 'table': {
  42. pattern: RegExp('^' + tableRow + tableLine + '(?:' + tableRow + ')*', 'm'),
  43. inside: {
  44. 'table-data-rows': {
  45. pattern: RegExp('^(' + tableRow + tableLine + ')(?:' + tableRow + ')*$'),
  46. lookbehind: true,
  47. inside: {
  48. 'table-data': {
  49. pattern: RegExp(tableCell),
  50. inside: Prism.languages.markdown
  51. },
  52. 'punctuation': /\|/
  53. }
  54. },
  55. 'table-line': {
  56. pattern: RegExp('^(' + tableRow + ')' + tableLine + '$'),
  57. lookbehind: true,
  58. inside: {
  59. 'punctuation': /\||:?-{3,}:?/
  60. }
  61. },
  62. 'table-header-row': {
  63. pattern: RegExp('^' + tableRow + '$'),
  64. inside: {
  65. 'table-header': {
  66. pattern: RegExp(tableCell),
  67. alias: 'important',
  68. inside: Prism.languages.markdown
  69. },
  70. 'punctuation': /\|/
  71. }
  72. }
  73. }
  74. },
  75. 'code': [
  76. {
  77. // Prefixed by 4 spaces or 1 tab and preceded by an empty line
  78. pattern: /((?:^|\n)[ \t]*\n|(?:^|\r\n?)[ \t]*\r\n?)(?: {4}|\t).+(?:(?:\n|\r\n?)(?: {4}|\t).+)*/,
  79. lookbehind: true,
  80. alias: 'keyword'
  81. },
  82. {
  83. // ```optional language
  84. // code block
  85. // ```
  86. pattern: /^```[\s\S]*?^```$/m,
  87. greedy: true,
  88. inside: {
  89. 'code-block': {
  90. pattern: /^(```.*(?:\n|\r\n?))[\s\S]+?(?=(?:\n|\r\n?)^```$)/m,
  91. lookbehind: true
  92. },
  93. 'code-language': {
  94. pattern: /^(```).+/,
  95. lookbehind: true
  96. },
  97. 'punctuation': /```/
  98. }
  99. }
  100. ],
  101. 'title': [
  102. {
  103. // title 1
  104. // =======
  105. // title 2
  106. // -------
  107. pattern: /\S.*(?:\n|\r\n?)(?:==+|--+)(?=[ \t]*$)/m,
  108. alias: 'important',
  109. inside: {
  110. punctuation: /==+$|--+$/
  111. }
  112. },
  113. {
  114. // # title 1
  115. // ###### title 6
  116. pattern: /(^\s*)#.+/m,
  117. lookbehind: true,
  118. alias: 'important',
  119. inside: {
  120. punctuation: /^#+|#+$/
  121. }
  122. }
  123. ],
  124. 'hr': {
  125. // ***
  126. // ---
  127. // * * *
  128. // -----------
  129. pattern: /(^\s*)([*-])(?:[\t ]*\2){2,}(?=\s*$)/m,
  130. lookbehind: true,
  131. alias: 'punctuation'
  132. },
  133. 'list': {
  134. // * item
  135. // + item
  136. // - item
  137. // 1. item
  138. pattern: /(^\s*)(?:[*+-]|\d+\.)(?=[\t ].)/m,
  139. lookbehind: true,
  140. alias: 'punctuation'
  141. },
  142. 'url-reference': {
  143. // [id]: http://example.com "Optional title"
  144. // [id]: http://example.com 'Optional title'
  145. // [id]: http://example.com (Optional title)
  146. // [id]: <http://example.com> "Optional title"
  147. pattern: /!?\[[^\]]+\]:[\t ]+(?:\S+|<(?:\\.|[^>\\])+>)(?:[\t ]+(?:"(?:\\.|[^"\\])*"|'(?:\\.|[^'\\])*'|\((?:\\.|[^)\\])*\)))?/,
  148. inside: {
  149. 'variable': {
  150. pattern: /^(!?\[)[^\]]+/,
  151. lookbehind: true
  152. },
  153. 'string': /(?:"(?:\\.|[^"\\])*"|'(?:\\.|[^'\\])*'|\((?:\\.|[^)\\])*\))$/,
  154. 'punctuation': /^[\[\]!:]|[<>]/
  155. },
  156. alias: 'url'
  157. },
  158. 'bold': {
  159. // **strong**
  160. // __strong__
  161. // allow one nested instance of italic text using the same delimiter
  162. pattern: createInline(/\b__(?:(?!_)<inner>|_(?:(?!_)<inner>)+_)+__\b|\*\*(?:(?!\*)<inner>|\*(?:(?!\*)<inner>)+\*)+\*\*/.source),
  163. lookbehind: true,
  164. greedy: true,
  165. inside: {
  166. 'content': {
  167. pattern: /(^..)[\s\S]+(?=..$)/,
  168. lookbehind: true,
  169. inside: {} // see below
  170. },
  171. 'punctuation': /\*\*|__/
  172. }
  173. },
  174. 'italic': {
  175. // *em*
  176. // _em_
  177. // allow one nested instance of bold text using the same delimiter
  178. pattern: createInline(/\b_(?:(?!_)<inner>|__(?:(?!_)<inner>)+__)+_\b|\*(?:(?!\*)<inner>|\*\*(?:(?!\*)<inner>)+\*\*)+\*/.source),
  179. lookbehind: true,
  180. greedy: true,
  181. inside: {
  182. 'content': {
  183. pattern: /(^.)[\s\S]+(?=.$)/,
  184. lookbehind: true,
  185. inside: {} // see below
  186. },
  187. 'punctuation': /[*_]/
  188. }
  189. },
  190. 'strike': {
  191. // ~~strike through~~
  192. // ~strike~
  193. // eslint-disable-next-line regexp/strict
  194. pattern: createInline(/(~~?)(?:(?!~)<inner>)+\2/.source),
  195. lookbehind: true,
  196. greedy: true,
  197. inside: {
  198. 'content': {
  199. pattern: /(^~~?)[\s\S]+(?=\1$)/,
  200. lookbehind: true,
  201. inside: {} // see below
  202. },
  203. 'punctuation': /~~?/
  204. }
  205. },
  206. 'code-snippet': {
  207. // `code`
  208. // ``code``
  209. pattern: /(^|[^\\`])(?:``[^`\r\n]+(?:`[^`\r\n]+)*``(?!`)|`[^`\r\n]+`(?!`))/,
  210. lookbehind: true,
  211. greedy: true,
  212. alias: ['code', 'keyword']
  213. },
  214. 'url': {
  215. // [example](http://example.com "Optional title")
  216. // [example][id]
  217. // [example] [id]
  218. pattern: createInline(/!?\[(?:(?!\])<inner>)+\](?:\([^\s)]+(?:[\t ]+"(?:\\.|[^"\\])*")?\)|[ \t]?\[(?:(?!\])<inner>)+\])/.source),
  219. lookbehind: true,
  220. greedy: true,
  221. inside: {
  222. 'operator': /^!/,
  223. 'content': {
  224. pattern: /(^\[)[^\]]+(?=\])/,
  225. lookbehind: true,
  226. inside: {} // see below
  227. },
  228. 'variable': {
  229. pattern: /(^\][ \t]?\[)[^\]]+(?=\]$)/,
  230. lookbehind: true
  231. },
  232. 'url': {
  233. pattern: /(^\]\()[^\s)]+/,
  234. lookbehind: true
  235. },
  236. 'string': {
  237. pattern: /(^[ \t]+)"(?:\\.|[^"\\])*"(?=\)$)/,
  238. lookbehind: true
  239. }
  240. }
  241. }
  242. });
  243. ['url', 'bold', 'italic', 'strike'].forEach(function (token) {
  244. ['url', 'bold', 'italic', 'strike', 'code-snippet'].forEach(function (inside) {
  245. if (token !== inside) {
  246. Prism.languages.markdown[token].inside.content.inside[inside] = Prism.languages.markdown[inside];
  247. }
  248. });
  249. });
  250. Prism.hooks.add('after-tokenize', function (env) {
  251. if (env.language !== 'markdown' && env.language !== 'md') {
  252. return;
  253. }
  254. function walkTokens(tokens) {
  255. if (!tokens || typeof tokens === 'string') {
  256. return;
  257. }
  258. for (var i = 0, l = tokens.length; i < l; i++) {
  259. var token = tokens[i];
  260. if (token.type !== 'code') {
  261. walkTokens(token.content);
  262. continue;
  263. }
  264. /*
  265. * Add the correct `language-xxxx` class to this code block. Keep in mind that the `code-language` token
  266. * is optional. But the grammar is defined so that there is only one case we have to handle:
  267. *
  268. * token.content = [
  269. * <span class="punctuation">```</span>,
  270. * <span class="code-language">xxxx</span>,
  271. * '\n', // exactly one new lines (\r or \n or \r\n)
  272. * <span class="code-block">...</span>,
  273. * '\n', // exactly one new lines again
  274. * <span class="punctuation">```</span>
  275. * ];
  276. */
  277. var codeLang = token.content[1];
  278. var codeBlock = token.content[3];
  279. if (codeLang && codeBlock &&
  280. codeLang.type === 'code-language' && codeBlock.type === 'code-block' &&
  281. typeof codeLang.content === 'string') {
  282. // this might be a language that Prism does not support
  283. // do some replacements to support C++, C#, and F#
  284. var lang = codeLang.content.replace(/\b#/g, 'sharp').replace(/\b\+\+/g, 'pp');
  285. // only use the first word
  286. lang = (/[a-z][\w-]*/i.exec(lang) || [''])[0].toLowerCase();
  287. var alias = 'language-' + lang;
  288. // add alias
  289. if (!codeBlock.alias) {
  290. codeBlock.alias = [alias];
  291. } else if (typeof codeBlock.alias === 'string') {
  292. codeBlock.alias = [codeBlock.alias, alias];
  293. } else {
  294. codeBlock.alias.push(alias);
  295. }
  296. }
  297. }
  298. }
  299. walkTokens(env.tokens);
  300. });
  301. Prism.hooks.add('wrap', function (env) {
  302. if (env.type !== 'code-block') {
  303. return;
  304. }
  305. var codeLang = '';
  306. for (var i = 0, l = env.classes.length; i < l; i++) {
  307. var cls = env.classes[i];
  308. var match = /language-(.+)/.exec(cls);
  309. if (match) {
  310. codeLang = match[1];
  311. break;
  312. }
  313. }
  314. var grammar = Prism.languages[codeLang];
  315. if (!grammar) {
  316. if (codeLang && codeLang !== 'none' && Prism.plugins.autoloader) {
  317. var id = 'md-' + new Date().valueOf() + '-' + Math.floor(Math.random() * 1e16);
  318. env.attributes['id'] = id;
  319. Prism.plugins.autoloader.loadLanguages(codeLang, function () {
  320. var ele = document.getElementById(id);
  321. if (ele) {
  322. ele.innerHTML = Prism.highlight(ele.textContent, Prism.languages[codeLang], codeLang);
  323. }
  324. });
  325. }
  326. } else {
  327. env.content = Prism.highlight(textContent(env.content), grammar, codeLang);
  328. }
  329. });
  330. var tagPattern = RegExp(Prism.languages.markup.tag.pattern.source, 'gi');
  331. /**
  332. * A list of known entity names.
  333. *
  334. * This will always be incomplete to save space. The current list is the one used by lowdash's unescape function.
  335. *
  336. * @see {@link https://github.com/lodash/lodash/blob/2da024c3b4f9947a48517639de7560457cd4ec6c/unescape.js#L2}
  337. */
  338. var KNOWN_ENTITY_NAMES = {
  339. 'amp': '&',
  340. 'lt': '<',
  341. 'gt': '>',
  342. 'quot': '"',
  343. };
  344. // IE 11 doesn't support `String.fromCodePoint`
  345. var fromCodePoint = String.fromCodePoint || String.fromCharCode;
  346. /**
  347. * Returns the text content of a given HTML source code string.
  348. *
  349. * @param {string} html
  350. * @returns {string}
  351. */
  352. function textContent(html) {
  353. // remove all tags
  354. var text = html.replace(tagPattern, '');
  355. // decode known entities
  356. text = text.replace(/&(\w{1,8}|#x?[\da-f]{1,8});/gi, function (m, code) {
  357. code = code.toLowerCase();
  358. if (code[0] === '#') {
  359. var value;
  360. if (code[1] === 'x') {
  361. value = parseInt(code.slice(2), 16);
  362. } else {
  363. value = Number(code.slice(1));
  364. }
  365. return fromCodePoint(value);
  366. } else {
  367. var known = KNOWN_ENTITY_NAMES[code];
  368. if (known) {
  369. return known;
  370. }
  371. // unable to decode
  372. return m;
  373. }
  374. });
  375. return text;
  376. }
  377. Prism.languages.md = Prism.languages.markdown;
  378. }(Prism));