prism-factor.js 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403
  1. (function (Prism) {
  2. var comment_inside = {
  3. 'function': /\b(?:BUGS?|FIX(?:MES?)?|NOTES?|TODOS?|XX+|HACKS?|WARN(?:ING)?|\?{2,}|!{2,})\b/
  4. };
  5. var string_inside = {
  6. 'number': /\\[^\s']|%\w/
  7. };
  8. var factor = {
  9. 'comment': [
  10. {
  11. // ! single-line exclamation point comments with whitespace after/around the !
  12. pattern: /(^|\s)(?:! .*|!$)/,
  13. lookbehind: true,
  14. inside: comment_inside
  15. },
  16. /* from basis/multiline: */
  17. {
  18. // /* comment */, /* comment*/
  19. pattern: /(^|\s)\/\*\s[\s\S]*?\*\/(?=\s|$)/,
  20. lookbehind: true,
  21. greedy: true,
  22. inside: comment_inside
  23. },
  24. {
  25. // ![[ comment ]] , ![===[ comment]===]
  26. pattern: /(^|\s)!\[(={0,6})\[\s[\s\S]*?\]\2\](?=\s|$)/,
  27. lookbehind: true,
  28. greedy: true,
  29. inside: comment_inside
  30. }
  31. ],
  32. 'number': [
  33. {
  34. // basic base 10 integers 9, -9
  35. pattern: /(^|\s)[+-]?\d+(?=\s|$)/,
  36. lookbehind: true
  37. },
  38. {
  39. // base prefix integers 0b010 0o70 0xad 0d10 0XAD -0xa9
  40. pattern: /(^|\s)[+-]?0(?:b[01]+|o[0-7]+|d\d+|x[\dA-F]+)(?=\s|$)/i,
  41. lookbehind: true
  42. },
  43. {
  44. // fractional ratios 1/5 -1/5 and the literal float approximations 1/5. -1/5.
  45. pattern: /(^|\s)[+-]?\d+\/\d+\.?(?=\s|$)/,
  46. lookbehind: true
  47. },
  48. {
  49. // positive mixed numbers 23+1/5 +23+1/5
  50. pattern: /(^|\s)\+?\d+\+\d+\/\d+(?=\s|$)/,
  51. lookbehind: true
  52. },
  53. {
  54. // negative mixed numbers -23-1/5
  55. pattern: /(^|\s)-\d+-\d+\/\d+(?=\s|$)/,
  56. lookbehind: true
  57. },
  58. {
  59. // basic decimal floats -0.01 0. .0 .1 -.1 -1. -12.13 +12.13
  60. // and scientific notation with base 10 exponents 3e4 3e-4 .3e-4
  61. pattern: /(^|\s)[+-]?(?:\d*\.\d+|\d+\.\d*|\d+)(?:e[+-]?\d+)?(?=\s|$)/i,
  62. lookbehind: true
  63. },
  64. {
  65. // NAN literal syntax NAN: 80000deadbeef, NAN: a
  66. pattern: /(^|\s)NAN:\s+[\da-fA-F]+(?=\s|$)/,
  67. lookbehind: true
  68. },
  69. {
  70. /*
  71. base prefix floats 0x1.0p3 (8.0) 0b1.010p2 (5.0) 0x1.p1 0b1.11111111p11111...
  72. "The normalized hex form ±0x1.MMMMMMMMMMMMM[pP]±EEEE allows any floating-point number to be specified precisely.
  73. The values of MMMMMMMMMMMMM and EEEE map directly to the mantissa and exponent fields of the binary IEEE 754 representation."
  74. <https://docs.factorcode.org/content/article-syntax-floats.html>
  75. */
  76. pattern: /(^|\s)[+-]?0(?:b1\.[01]*|o1\.[0-7]*|d1\.\d*|x1\.[\dA-F]*)p\d+(?=\s|$)/i,
  77. lookbehind: true
  78. }
  79. ],
  80. // R/ regexp?\/\\/
  81. 'regexp': {
  82. pattern: /(^|\s)R\/\s(?:\\\S|[^\\/])*\/(?:[idmsr]*|[idmsr]+-[idmsr]+)(?=\s|$)/,
  83. lookbehind: true,
  84. alias: 'number',
  85. inside: {
  86. 'variable': /\\\S/,
  87. 'keyword': /[+?*\[\]^$(){}.|]/,
  88. 'operator': {
  89. pattern: /(\/)[idmsr]+(?:-[idmsr]+)?/,
  90. lookbehind: true
  91. }
  92. }
  93. },
  94. 'boolean': {
  95. pattern: /(^|\s)[tf](?=\s|$)/,
  96. lookbehind: true
  97. },
  98. // SBUF" asd", URL" ://...", P" /etc/"
  99. 'custom-string': {
  100. pattern: /(^|\s)[A-Z0-9\-]+"\s(?:\\\S|[^"\\])*"/,
  101. lookbehind: true,
  102. greedy: true,
  103. alias: 'string',
  104. inside: {
  105. 'number': /\\\S|%\w|\//
  106. }
  107. },
  108. 'multiline-string': [
  109. {
  110. // STRING: name \n content \n ; -> CONSTANT: name "content" (symbol)
  111. pattern: /(^|\s)STRING:\s+\S+(?:\n|\r\n).*(?:\n|\r\n)\s*;(?=\s|$)/,
  112. lookbehind: true,
  113. greedy: true,
  114. alias: 'string',
  115. inside: {
  116. 'number': string_inside.number,
  117. // trailing semicolon on its own line
  118. 'semicolon-or-setlocal': {
  119. pattern: /([\r\n][ \t]*);(?=\s|$)/,
  120. lookbehind: true,
  121. alias: 'function'
  122. }
  123. }
  124. },
  125. {
  126. // HEREDOC: marker \n content \n marker ; -> "content" (immediate)
  127. pattern: /(^|\s)HEREDOC:\s+\S+(?:\n|\r\n).*(?:\n|\r\n)\s*\S+(?=\s|$)/,
  128. lookbehind: true,
  129. greedy: true,
  130. alias: 'string',
  131. inside: string_inside
  132. },
  133. {
  134. // [[ string ]], [==[ string]==]
  135. pattern: /(^|\s)\[(={0,6})\[\s[\s\S]*?\]\2\](?=\s|$)/,
  136. lookbehind: true,
  137. greedy: true,
  138. alias: 'string',
  139. inside: string_inside
  140. }
  141. ],
  142. 'special-using': {
  143. pattern: /(^|\s)USING:(?:\s\S+)*(?=\s+;(?:\s|$))/,
  144. lookbehind: true,
  145. alias: 'function',
  146. inside: {
  147. // this is essentially a regex for vocab names, which i don't want to specify
  148. // but the USING: gets picked up as a vocab name
  149. 'string': {
  150. pattern: /(\s)[^:\s]+/,
  151. lookbehind: true
  152. }
  153. }
  154. },
  155. /* this description of stack effect literal syntax is not complete and not as specific as theoretically possible
  156. trying to do better is more work and regex-computation-time than it's worth though.
  157. - we'd like to have the "delimiter" parts of the stack effect [ (, --, and ) ] be a different (less-important or comment-like) colour to the stack effect contents
  158. - we'd like if nested stack effects were treated as such rather than just appearing flat (with `inside`)
  159. - we'd like if the following variable name conventions were recognised specifically:
  160. special row variables = ..a b..
  161. type and stack effect annotations end with a colon = ( quot: ( a: ( -- ) -- b ) -- x ), ( x: number -- )
  162. word throws unconditional error = *
  163. any other word-like variable name = a ? q' etc
  164. https://docs.factorcode.org/content/article-effects.html
  165. these are pretty complicated to highlight properly without a real parser, and therefore out of scope
  166. the old pattern, which may be later useful, was: (^|\s)(?:call|execute|eval)?\((?:\s+[^"\r\n\t ]\S*)*?\s+--(?:\s+[^"\n\t ]\S*)*?\s+\)(?=\s|$)
  167. */
  168. // current solution is not great
  169. 'stack-effect-delimiter': [
  170. {
  171. // opening parenthesis
  172. pattern: /(^|\s)(?:call|eval|execute)?\((?=\s)/,
  173. lookbehind: true,
  174. alias: 'operator'
  175. },
  176. {
  177. // middle --
  178. pattern: /(\s)--(?=\s)/,
  179. lookbehind: true,
  180. alias: 'operator'
  181. },
  182. {
  183. // closing parenthesis
  184. pattern: /(\s)\)(?=\s|$)/,
  185. lookbehind: true,
  186. alias: 'operator'
  187. }
  188. ],
  189. 'combinators': {
  190. pattern: null,
  191. lookbehind: true,
  192. alias: 'keyword'
  193. },
  194. 'kernel-builtin': {
  195. pattern: null,
  196. lookbehind: true,
  197. alias: 'variable'
  198. },
  199. 'sequences-builtin': {
  200. pattern: null,
  201. lookbehind: true,
  202. alias: 'variable'
  203. },
  204. 'math-builtin': {
  205. pattern: null,
  206. lookbehind: true,
  207. alias: 'variable'
  208. },
  209. 'constructor-word': {
  210. // <array> but not <=>
  211. pattern: /(^|\s)<(?!=+>|-+>)\S+>(?=\s|$)/,
  212. lookbehind: true,
  213. alias: 'keyword'
  214. },
  215. 'other-builtin-syntax': {
  216. pattern: null,
  217. lookbehind: true,
  218. alias: 'operator'
  219. },
  220. /*
  221. full list of supported word naming conventions: (the convention appears outside of the [brackets])
  222. set-[x]
  223. change-[x]
  224. with-[x]
  225. new-[x]
  226. >[string]
  227. [base]>
  228. [string]>[number]
  229. +[symbol]+
  230. [boolean-word]?
  231. ?[of]
  232. [slot-reader]>>
  233. >>[slot-setter]
  234. [slot-writer]<<
  235. ([implementation-detail])
  236. [mutater]!
  237. [variant]*
  238. [prettyprint].
  239. $[help-markup]
  240. <constructors>, SYNTAX:, etc are supported by their own patterns.
  241. `with` and `new` from `kernel` are their own builtins.
  242. see <https://docs.factorcode.org/content/article-conventions.html>
  243. */
  244. 'conventionally-named-word': {
  245. pattern: /(^|\s)(?!")(?:(?:change|new|set|with)-\S+|\$\S+|>[^>\s]+|[^:>\s]+>|[^>\s]+>[^>\s]+|\+[^+\s]+\+|[^?\s]+\?|\?[^?\s]+|[^>\s]+>>|>>[^>\s]+|[^<\s]+<<|\([^()\s]+\)|[^!\s]+!|[^*\s]\S*\*|[^.\s]\S*\.)(?=\s|$)/,
  246. lookbehind: true,
  247. alias: 'keyword'
  248. },
  249. 'colon-syntax': {
  250. pattern: /(^|\s)(?:[A-Z0-9\-]+#?)?:{1,2}\s+(?:;\S+|(?!;)\S+)(?=\s|$)/,
  251. lookbehind: true,
  252. greedy: true,
  253. alias: 'function'
  254. },
  255. 'semicolon-or-setlocal': {
  256. pattern: /(\s)(?:;|:>)(?=\s|$)/,
  257. lookbehind: true,
  258. alias: 'function'
  259. },
  260. // do not highlight leading } or trailing X{ at the begin/end of the file as it's invalid syntax
  261. 'curly-brace-literal-delimiter': [
  262. {
  263. // opening
  264. pattern: /(^|\s)[a-z]*\{(?=\s)/i,
  265. lookbehind: true,
  266. alias: 'operator'
  267. },
  268. {
  269. // closing
  270. pattern: /(\s)\}(?=\s|$)/,
  271. lookbehind: true,
  272. alias: 'operator'
  273. },
  274. ],
  275. // do not highlight leading ] or trailing [ at the begin/end of the file as it's invalid syntax
  276. 'quotation-delimiter': [
  277. {
  278. // opening
  279. pattern: /(^|\s)\[(?=\s)/,
  280. lookbehind: true,
  281. alias: 'operator'
  282. },
  283. {
  284. // closing
  285. pattern: /(\s)\](?=\s|$)/,
  286. lookbehind: true,
  287. alias: 'operator'
  288. },
  289. ],
  290. 'normal-word': {
  291. pattern: /(^|\s)[^"\s]\S*(?=\s|$)/,
  292. lookbehind: true
  293. },
  294. /*
  295. basic first-class string "a"
  296. with escaped double-quote "a\""
  297. escaped backslash "\\"
  298. and general escapes since Factor has so many "\N"
  299. syntax that works in the reference implementation that isn't fully
  300. supported because it's an implementation detail:
  301. "string 1""string 2" -> 2 strings (works anyway)
  302. "string"5 -> string, 5
  303. "string"[ ] -> string, quotation
  304. { "a"} -> array<string>
  305. the rest of those examples all properly recognise the string, but not
  306. the other object (number, quotation, etc)
  307. this is fine for a regex-only implementation.
  308. */
  309. 'string': {
  310. pattern: /"(?:\\\S|[^"\\])*"/,
  311. greedy: true,
  312. inside: string_inside
  313. }
  314. };
  315. var escape = function (str) {
  316. return (str + '').replace(/([.?*+\^$\[\]\\(){}|\-])/g, '\\$1');
  317. };
  318. var arrToWordsRegExp = function (arr) {
  319. return new RegExp(
  320. '(^|\\s)(?:' + arr.map(escape).join('|') + ')(?=\\s|$)'
  321. );
  322. };
  323. var builtins = {
  324. 'kernel-builtin': [
  325. 'or', '2nipd', '4drop', 'tuck', 'wrapper', 'nip', 'wrapper?', 'callstack>array', 'die', 'dupd', 'callstack', 'callstack?', '3dup', 'hashcode', 'pick', '4nip', 'build', '>boolean', 'nipd', 'clone', '5nip', 'eq?', '?', '=', 'swapd', '2over', 'clear', '2dup', 'get-retainstack', 'not', 'tuple?', 'dup', '3nipd', 'call', '-rotd', 'object', 'drop', 'assert=', 'assert?', '-rot', 'execute', 'boa', 'get-callstack', 'curried?', '3drop', 'pickd', 'overd', 'over', 'roll', '3nip', 'swap', 'and', '2nip', 'rotd', 'throw', '(clone)', 'hashcode*', 'spin', 'reach', '4dup', 'equal?', 'get-datastack', 'assert', '2drop', '<wrapper>', 'boolean?', 'identity-hashcode', 'identity-tuple?', 'null', 'composed?', 'new', '5drop', 'rot', '-roll', 'xor', 'identity-tuple', 'boolean'
  326. ],
  327. 'other-builtin-syntax': [
  328. // syntax
  329. '=======', 'recursive', 'flushable', '>>', '<<<<<<', 'M\\', 'B', 'PRIVATE>', '\\', '======', 'final', 'inline', 'delimiter', 'deprecated', '<PRIVATE', '>>>>>>', '<<<<<<<', 'parse-complex', 'malformed-complex', 'read-only', '>>>>>>>', 'call-next-method', '<<', 'foldable',
  330. // literals
  331. '$', '$[', '${'
  332. ],
  333. 'sequences-builtin': [
  334. 'member-eq?', 'mismatch', 'append', 'assert-sequence=', 'longer', 'repetition', 'clone-like', '3sequence', 'assert-sequence?', 'last-index-from', 'reversed', 'index-from', 'cut*', 'pad-tail', 'join-as', 'remove-eq!', 'concat-as', 'but-last', 'snip', 'nths', 'nth', 'sequence', 'longest', 'slice?', '<slice>', 'remove-nth', 'tail-slice', 'empty?', 'tail*', 'member?', 'virtual-sequence?', 'set-length', 'drop-prefix', 'iota', 'unclip', 'bounds-error?', 'unclip-last-slice', 'non-negative-integer-expected', 'non-negative-integer-expected?', 'midpoint@', 'longer?', '?set-nth', '?first', 'rest-slice', 'prepend-as', 'prepend', 'fourth', 'sift', 'subseq-start', 'new-sequence', '?last', 'like', 'first4', '1sequence', 'reverse', 'slice', 'virtual@', 'repetition?', 'set-last', 'index', '4sequence', 'max-length', 'set-second', 'immutable-sequence', 'first2', 'first3', 'supremum', 'unclip-slice', 'suffix!', 'insert-nth', 'tail', '3append', 'short', 'suffix', 'concat', 'flip', 'immutable?', 'reverse!', '2sequence', 'sum', 'delete-all', 'indices', 'snip-slice', '<iota>', 'check-slice', 'sequence?', 'head', 'append-as', 'halves', 'sequence=', 'collapse-slice', '?second', 'slice-error?', 'product', 'bounds-check?', 'bounds-check', 'immutable', 'virtual-exemplar', 'harvest', 'remove', 'pad-head', 'last', 'set-fourth', 'cartesian-product', 'remove-eq', 'shorten', 'shorter', 'reversed?', 'shorter?', 'shortest', 'head-slice', 'pop*', 'tail-slice*', 'but-last-slice', 'iota?', 'append!', 'cut-slice', 'new-resizable', 'head-slice*', 'sequence-hashcode', 'pop', 'set-nth', '?nth', 'second', 'join', 'immutable-sequence?', '<reversed>', '3append-as', 'virtual-sequence', 'subseq?', 'remove-nth!', 'length', 'last-index', 'lengthen', 'assert-sequence', 'copy', 'move', 'third', 'first', 'tail?', 'set-first', 'prefix', 'bounds-error', '<repetition>', 'exchange', 'surround', 'cut', 'min-length', 'set-third', 'push-all', 'head?', 'subseq-start-from', 'delete-slice', 'rest', 'sum-lengths', 'head*', 'infimum', 'remove!', 'glue', 'slice-error', 'subseq', 'push', 'replace-slice', 'subseq-as', 'unclip-last'
  335. ],
  336. 'math-builtin': [
  337. 'number=', 'next-power-of-2', '?1+', 'fp-special?', 'imaginary-part', 'float>bits', 'number?', 'fp-infinity?', 'bignum?', 'fp-snan?', 'denominator', 'gcd', '*', '+', 'fp-bitwise=', '-', 'u>=', '/', '>=', 'bitand', 'power-of-2?', 'log2-expects-positive', 'neg?', '<', 'log2', '>', 'integer?', 'number', 'bits>double', '2/', 'zero?', 'bits>float', 'float?', 'shift', 'ratio?', 'rect>', 'even?', 'ratio', 'fp-sign', 'bitnot', '>fixnum', 'complex?', '/i', 'integer>fixnum', '/f', 'sgn', '>bignum', 'next-float', 'u<', 'u>', 'mod', 'recip', 'rational', '>float', '2^', 'integer', 'fixnum?', 'neg', 'fixnum', 'sq', 'bignum', '>rect', 'bit?', 'fp-qnan?', 'simple-gcd', 'complex', '<fp-nan>', 'real', '>fraction', 'double>bits', 'bitor', 'rem', 'fp-nan-payload', 'real-part', 'log2-expects-positive?', 'prev-float', 'align', 'unordered?', 'float', 'fp-nan?', 'abs', 'bitxor', 'integer>fixnum-strict', 'u<=', 'odd?', '<=', '/mod', '>integer', 'real?', 'rational?', 'numerator'
  338. ]
  339. // that's all for now
  340. };
  341. Object.keys(builtins).forEach(function (k) {
  342. factor[k].pattern = arrToWordsRegExp(builtins[k]);
  343. });
  344. var combinators = [
  345. // kernel
  346. '2bi', 'while', '2tri', 'bi*', '4dip', 'both?', 'same?', 'tri@', 'curry', 'prepose', '3bi', '?if', 'tri*', '2keep', '3keep', 'curried', '2keepd', 'when', '2bi*', '2tri*', '4keep', 'bi@', 'keepdd', 'do', 'unless*', 'tri-curry', 'if*', 'loop', 'bi-curry*', 'when*', '2bi@', '2tri@', 'with', '2with', 'either?', 'bi', 'until', '3dip', '3curry', 'tri-curry*', 'tri-curry@', 'bi-curry', 'keepd', 'compose', '2dip', 'if', '3tri', 'unless', 'tuple', 'keep', '2curry', 'tri', 'most', 'while*', 'dip', 'composed', 'bi-curry@',
  347. // sequences
  348. 'find-last-from', 'trim-head-slice', 'map-as', 'each-from', 'none?', 'trim-tail', 'partition', 'if-empty', 'accumulate*', 'reject!', 'find-from', 'accumulate-as', 'collector-for-as', 'reject', 'map', 'map-sum', 'accumulate!', '2each-from', 'follow', 'supremum-by', 'map!', 'unless-empty', 'collector', 'padding', 'reduce-index', 'replicate-as', 'infimum-by', 'trim-tail-slice', 'count', 'find-index', 'filter', 'accumulate*!', 'reject-as', 'map-integers', 'map-find', 'reduce', 'selector', 'interleave', '2map', 'filter-as', 'binary-reduce', 'map-index-as', 'find', 'produce', 'filter!', 'replicate', 'cartesian-map', 'cartesian-each', 'find-index-from', 'map-find-last', '3map-as', '3map', 'find-last', 'selector-as', '2map-as', '2map-reduce', 'accumulate', 'each', 'each-index', 'accumulate*-as', 'when-empty', 'all?', 'collector-as', 'push-either', 'new-like', 'collector-for', '2selector', 'push-if', '2all?', 'map-reduce', '3each', 'any?', 'trim-slice', '2reduce', 'change-nth', 'produce-as', '2each', 'trim', 'trim-head', 'cartesian-find', 'map-index',
  349. // math
  350. 'if-zero', 'each-integer', 'unless-zero', '(find-integer)', 'when-zero', 'find-last-integer', '(all-integers?)', 'times', '(each-integer)', 'find-integer', 'all-integers?',
  351. // math.combinators
  352. 'unless-negative', 'if-positive', 'when-positive', 'when-negative', 'unless-positive', 'if-negative',
  353. // combinators
  354. 'case', '2cleave', 'cond>quot', 'case>quot', '3cleave', 'wrong-values', 'to-fixed-point', 'alist>quot', 'cond', 'cleave', 'call-effect', 'recursive-hashcode', 'spread', 'deep-spread>quot',
  355. // combinators.short-circuit
  356. '2||', '0||', 'n||', '0&&', '2&&', '3||', '1||', '1&&', 'n&&', '3&&',
  357. // combinators.smart
  358. 'smart-unless*', 'keep-inputs', 'reduce-outputs', 'smart-when*', 'cleave>array', 'smart-with', 'smart-apply', 'smart-if', 'inputs/outputs', 'output>sequence-n', 'map-outputs', 'map-reduce-outputs', 'dropping', 'output>array', 'smart-map-reduce', 'smart-2map-reduce', 'output>array-n', 'nullary', 'input<sequence', 'append-outputs', 'drop-inputs', 'inputs', 'smart-2reduce', 'drop-outputs', 'smart-reduce', 'preserving', 'smart-when', 'outputs', 'append-outputs-as', 'smart-unless', 'smart-if*', 'sum-outputs', 'input<sequence-unsafe', 'output>sequence',
  359. // tafn
  360. ];
  361. factor.combinators.pattern = arrToWordsRegExp(combinators);
  362. Prism.languages.factor = factor;
  363. }(Prism));