prism-scheme.js 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. (function (Prism) {
  2. Prism.languages.scheme = {
  3. // this supports "normal" single-line comments:
  4. // ; comment
  5. // and (potentially nested) multiline comments:
  6. // #| comment #| nested |# still comment |#
  7. // (only 1 level of nesting is supported)
  8. 'comment': /;.*|#;\s*(?:\((?:[^()]|\([^()]*\))*\)|\[(?:[^\[\]]|\[[^\[\]]*\])*\])|#\|(?:[^#|]|#(?!\|)|\|(?!#)|#\|(?:[^#|]|#(?!\|)|\|(?!#))*\|#)*\|#/,
  9. 'string': {
  10. pattern: /"(?:[^"\\]|\\.)*"/,
  11. greedy: true
  12. },
  13. 'symbol': {
  14. pattern: /'[^()\[\]#'\s]+/,
  15. greedy: true
  16. },
  17. 'char': {
  18. pattern: /#\\(?:[ux][a-fA-F\d]+\b|[-a-zA-Z]+\b|[\uD800-\uDBFF][\uDC00-\uDFFF]|\S)/,
  19. greedy: true
  20. },
  21. 'lambda-parameter': [
  22. // https://www.cs.cmu.edu/Groups/AI/html/r4rs/r4rs_6.html#SEC30
  23. {
  24. pattern: /((?:^|[^'`#])[(\[]lambda\s+)(?:[^|()\[\]'\s]+|\|(?:[^\\|]|\\.)*\|)/,
  25. lookbehind: true
  26. },
  27. {
  28. pattern: /((?:^|[^'`#])[(\[]lambda\s+[(\[])[^()\[\]']+/,
  29. lookbehind: true
  30. }
  31. ],
  32. 'keyword': {
  33. pattern: /((?:^|[^'`#])[(\[])(?:begin|case(?:-lambda)?|cond(?:-expand)?|define(?:-library|-macro|-record-type|-syntax|-values)?|defmacro|delay(?:-force)?|do|else|except|export|guard|if|import|include(?:-ci|-library-declarations)?|lambda|let(?:rec)?(?:-syntax|-values|\*)?|let\*-values|only|parameterize|prefix|(?:quasi-?)?quote|rename|set!|syntax-(?:case|rules)|unless|unquote(?:-splicing)?|when)(?=[()\[\]\s]|$)/,
  34. lookbehind: true
  35. },
  36. 'builtin': {
  37. // all functions of the base library of R7RS plus some of built-ins of R5Rs
  38. pattern: /((?:^|[^'`#])[(\[])(?:abs|and|append|apply|assoc|ass[qv]|binary-port\?|boolean=?\?|bytevector(?:-append|-copy|-copy!|-length|-u8-ref|-u8-set!|\?)?|caar|cadr|call-with-(?:current-continuation|port|values)|call\/cc|car|cdar|cddr|cdr|ceiling|char(?:->integer|-ready\?|\?|<\?|<=\?|=\?|>\?|>=\?)|close-(?:input-port|output-port|port)|complex\?|cons|current-(?:error|input|output)-port|denominator|dynamic-wind|eof-object\??|eq\?|equal\?|eqv\?|error|error-object(?:-irritants|-message|\?)|eval|even\?|exact(?:-integer-sqrt|-integer\?|\?)?|expt|features|file-error\?|floor(?:-quotient|-remainder|\/)?|flush-output-port|for-each|gcd|get-output-(?:bytevector|string)|inexact\??|input-port(?:-open\?|\?)|integer(?:->char|\?)|lcm|length|list(?:->string|->vector|-copy|-ref|-set!|-tail|\?)?|make-(?:bytevector|list|parameter|string|vector)|map|max|member|memq|memv|min|modulo|negative\?|newline|not|null\?|number(?:->string|\?)|numerator|odd\?|open-(?:input|output)-(?:bytevector|string)|or|output-port(?:-open\?|\?)|pair\?|peek-char|peek-u8|port\?|positive\?|procedure\?|quotient|raise|raise-continuable|rational\?|rationalize|read-(?:bytevector|bytevector!|char|error\?|line|string|u8)|real\?|remainder|reverse|round|set-c[ad]r!|square|string(?:->list|->number|->symbol|->utf8|->vector|-append|-copy|-copy!|-fill!|-for-each|-length|-map|-ref|-set!|\?|<\?|<=\?|=\?|>\?|>=\?)?|substring|symbol(?:->string|\?|=\?)|syntax-error|textual-port\?|truncate(?:-quotient|-remainder|\/)?|u8-ready\?|utf8->string|values|vector(?:->list|->string|-append|-copy|-copy!|-fill!|-for-each|-length|-map|-ref|-set!|\?)?|with-exception-handler|write-(?:bytevector|char|string|u8)|zero\?)(?=[()\[\]\s]|$)/,
  39. lookbehind: true
  40. },
  41. 'operator': {
  42. pattern: /((?:^|[^'`#])[(\[])(?:[-+*%/]|[<>]=?|=>?)(?=[()\[\]\s]|$)/,
  43. lookbehind: true
  44. },
  45. 'number': {
  46. // The number pattern from [the R7RS spec](https://small.r7rs.org/attachment/r7rs.pdf).
  47. //
  48. // <number> := <num 2>|<num 8>|<num 10>|<num 16>
  49. // <num R> := <prefix R><complex R>
  50. // <complex R> := <real R>(?:@<real R>|<imaginary R>)?|<imaginary R>
  51. // <imaginary R> := [+-](?:<ureal R>|(?:inf|nan)\.0)?i
  52. // <real R> := [+-]?<ureal R>|[+-](?:inf|nan)\.0
  53. // <ureal R> := <uint R>(?:\/<uint R>)?
  54. // | <decimal R>
  55. //
  56. // <decimal 10> := (?:\d+(?:\.\d*)?|\.\d+)(?:e[+-]?\d+)?
  57. // <uint R> := <digit R>+
  58. // <prefix R> := <radix R>(?:#[ei])?|(?:#[ei])?<radix R>
  59. // <radix 2> := #b
  60. // <radix 8> := #o
  61. // <radix 10> := (?:#d)?
  62. // <radix 16> := #x
  63. // <digit 2> := [01]
  64. // <digit 8> := [0-7]
  65. // <digit 10> := \d
  66. // <digit 16> := [0-9a-f]
  67. //
  68. // The problem with this grammar is that the resulting regex is way to complex, so we simplify by grouping all
  69. // non-decimal bases together. This results in a decimal (dec) and combined binary, octal, and hexadecimal (box)
  70. // pattern:
  71. pattern: RegExp(SortedBNF({
  72. '<ureal dec>': /\d+(?:\/\d+)|(?:\d+(?:\.\d*)?|\.\d+)(?:[esfdl][+-]?\d+)?/.source,
  73. '<real dec>': /[+-]?<ureal dec>|[+-](?:inf|nan)\.0/.source,
  74. '<imaginary dec>': /[+-](?:<ureal dec>|(?:inf|nan)\.0)?i/.source,
  75. '<complex dec>': /<real dec>(?:@<real dec>|<imaginary dec>)?|<imaginary dec>/.source,
  76. '<num dec>': /(?:#d(?:#[ei])?|#[ei](?:#d)?)?<complex dec>/.source,
  77. '<ureal box>': /[0-9a-f]+(?:\/[0-9a-f]+)?/.source,
  78. '<real box>': /[+-]?<ureal box>|[+-](?:inf|nan)\.0/.source,
  79. '<imaginary box>': /[+-](?:<ureal box>|(?:inf|nan)\.0)?i/.source,
  80. '<complex box>': /<real box>(?:@<real box>|<imaginary box>)?|<imaginary box>/.source,
  81. '<num box>': /#[box](?:#[ei])?|(?:#[ei])?#[box]<complex box>/.source,
  82. '<number>': /(^|[()\[\]\s])(?:<num dec>|<num box>)(?=[()\[\]\s]|$)/.source,
  83. }), 'i'),
  84. lookbehind: true
  85. },
  86. 'boolean': {
  87. pattern: /(^|[()\[\]\s])#(?:[ft]|false|true)(?=[()\[\]\s]|$)/,
  88. lookbehind: true
  89. },
  90. 'function': {
  91. pattern: /((?:^|[^'`#])[(\[])(?:[^|()\[\]'\s]+|\|(?:[^\\|]|\\.)*\|)(?=[()\[\]\s]|$)/,
  92. lookbehind: true
  93. },
  94. 'identifier': {
  95. pattern: /(^|[()\[\]\s])\|(?:[^\\|]|\\.)*\|(?=[()\[\]\s]|$)/,
  96. lookbehind: true,
  97. greedy: true
  98. },
  99. 'punctuation': /[()\[\]']/
  100. };
  101. /**
  102. * Given a topologically sorted BNF grammar, this will return the RegExp source of last rule of the grammar.
  103. *
  104. * @param {Record<string, string>} grammar
  105. * @returns {string}
  106. */
  107. function SortedBNF(grammar) {
  108. for (var key in grammar) {
  109. grammar[key] = grammar[key].replace(/<[\w\s]+>/g, function (key) {
  110. return '(?:' + grammar[key].trim() + ')';
  111. });
  112. }
  113. // return the last item
  114. return grammar[key];
  115. }
  116. }(Prism));