resolve-flow-scalar.js 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225
  1. 'use strict';
  2. var Scalar = require('../nodes/Scalar.js');
  3. var resolveEnd = require('./resolve-end.js');
  4. function resolveFlowScalar(scalar, strict, onError) {
  5. const { offset, type, source, end } = scalar;
  6. let _type;
  7. let value;
  8. const _onError = (rel, code, msg) => onError(offset + rel, code, msg);
  9. switch (type) {
  10. case 'scalar':
  11. _type = Scalar.Scalar.PLAIN;
  12. value = plainValue(source, _onError);
  13. break;
  14. case 'single-quoted-scalar':
  15. _type = Scalar.Scalar.QUOTE_SINGLE;
  16. value = singleQuotedValue(source, _onError);
  17. break;
  18. case 'double-quoted-scalar':
  19. _type = Scalar.Scalar.QUOTE_DOUBLE;
  20. value = doubleQuotedValue(source, _onError);
  21. break;
  22. /* istanbul ignore next should not happen */
  23. default:
  24. onError(scalar, 'UNEXPECTED_TOKEN', `Expected a flow scalar value, but found: ${type}`);
  25. return {
  26. value: '',
  27. type: null,
  28. comment: '',
  29. range: [offset, offset + source.length, offset + source.length]
  30. };
  31. }
  32. const valueEnd = offset + source.length;
  33. const re = resolveEnd.resolveEnd(end, valueEnd, strict, onError);
  34. return {
  35. value,
  36. type: _type,
  37. comment: re.comment,
  38. range: [offset, valueEnd, re.offset]
  39. };
  40. }
  41. function plainValue(source, onError) {
  42. let badChar = '';
  43. switch (source[0]) {
  44. /* istanbul ignore next should not happen */
  45. case '\t':
  46. badChar = 'a tab character';
  47. break;
  48. case ',':
  49. badChar = 'flow indicator character ,';
  50. break;
  51. case '%':
  52. badChar = 'directive indicator character %';
  53. break;
  54. case '|':
  55. case '>': {
  56. badChar = `block scalar indicator ${source[0]}`;
  57. break;
  58. }
  59. case '@':
  60. case '`': {
  61. badChar = `reserved character ${source[0]}`;
  62. break;
  63. }
  64. }
  65. if (badChar)
  66. onError(0, 'BAD_SCALAR_START', `Plain value cannot start with ${badChar}`);
  67. return foldLines(source);
  68. }
  69. function singleQuotedValue(source, onError) {
  70. if (source[source.length - 1] !== "'" || source.length === 1)
  71. onError(source.length, 'MISSING_CHAR', "Missing closing 'quote");
  72. return foldLines(source.slice(1, -1)).replace(/''/g, "'");
  73. }
  74. function foldLines(source) {
  75. /**
  76. * The negative lookbehind here and in the `re` RegExp is to
  77. * prevent causing a polynomial search time in certain cases.
  78. *
  79. * The try-catch is for Safari, which doesn't support this yet:
  80. * https://caniuse.com/js-regexp-lookbehind
  81. */
  82. let first, line;
  83. try {
  84. first = new RegExp('(.*?)(?<![ \t])[ \t]*\r?\n', 'sy');
  85. line = new RegExp('[ \t]*(.*?)(?:(?<![ \t])[ \t]*)?\r?\n', 'sy');
  86. }
  87. catch {
  88. first = /(.*?)[ \t]*\r?\n/sy;
  89. line = /[ \t]*(.*?)[ \t]*\r?\n/sy;
  90. }
  91. let match = first.exec(source);
  92. if (!match)
  93. return source;
  94. let res = match[1];
  95. let sep = ' ';
  96. let pos = first.lastIndex;
  97. line.lastIndex = pos;
  98. while ((match = line.exec(source))) {
  99. if (match[1] === '') {
  100. if (sep === '\n')
  101. res += sep;
  102. else
  103. sep = '\n';
  104. }
  105. else {
  106. res += sep + match[1];
  107. sep = ' ';
  108. }
  109. pos = line.lastIndex;
  110. }
  111. const last = /[ \t]*(.*)/sy;
  112. last.lastIndex = pos;
  113. match = last.exec(source);
  114. return res + sep + (match?.[1] ?? '');
  115. }
  116. function doubleQuotedValue(source, onError) {
  117. let res = '';
  118. for (let i = 1; i < source.length - 1; ++i) {
  119. const ch = source[i];
  120. if (ch === '\r' && source[i + 1] === '\n')
  121. continue;
  122. if (ch === '\n') {
  123. const { fold, offset } = foldNewline(source, i);
  124. res += fold;
  125. i = offset;
  126. }
  127. else if (ch === '\\') {
  128. let next = source[++i];
  129. const cc = escapeCodes[next];
  130. if (cc)
  131. res += cc;
  132. else if (next === '\n') {
  133. // skip escaped newlines, but still trim the following line
  134. next = source[i + 1];
  135. while (next === ' ' || next === '\t')
  136. next = source[++i + 1];
  137. }
  138. else if (next === '\r' && source[i + 1] === '\n') {
  139. // skip escaped CRLF newlines, but still trim the following line
  140. next = source[++i + 1];
  141. while (next === ' ' || next === '\t')
  142. next = source[++i + 1];
  143. }
  144. else if (next === 'x' || next === 'u' || next === 'U') {
  145. const length = { x: 2, u: 4, U: 8 }[next];
  146. res += parseCharCode(source, i + 1, length, onError);
  147. i += length;
  148. }
  149. else {
  150. const raw = source.substr(i - 1, 2);
  151. onError(i - 1, 'BAD_DQ_ESCAPE', `Invalid escape sequence ${raw}`);
  152. res += raw;
  153. }
  154. }
  155. else if (ch === ' ' || ch === '\t') {
  156. // trim trailing whitespace
  157. const wsStart = i;
  158. let next = source[i + 1];
  159. while (next === ' ' || next === '\t')
  160. next = source[++i + 1];
  161. if (next !== '\n' && !(next === '\r' && source[i + 2] === '\n'))
  162. res += i > wsStart ? source.slice(wsStart, i + 1) : ch;
  163. }
  164. else {
  165. res += ch;
  166. }
  167. }
  168. if (source[source.length - 1] !== '"' || source.length === 1)
  169. onError(source.length, 'MISSING_CHAR', 'Missing closing "quote');
  170. return res;
  171. }
  172. /**
  173. * Fold a single newline into a space, multiple newlines to N - 1 newlines.
  174. * Presumes `source[offset] === '\n'`
  175. */
  176. function foldNewline(source, offset) {
  177. let fold = '';
  178. let ch = source[offset + 1];
  179. while (ch === ' ' || ch === '\t' || ch === '\n' || ch === '\r') {
  180. if (ch === '\r' && source[offset + 2] !== '\n')
  181. break;
  182. if (ch === '\n')
  183. fold += '\n';
  184. offset += 1;
  185. ch = source[offset + 1];
  186. }
  187. if (!fold)
  188. fold = ' ';
  189. return { fold, offset };
  190. }
  191. const escapeCodes = {
  192. '0': '\0', // null character
  193. a: '\x07', // bell character
  194. b: '\b', // backspace
  195. e: '\x1b', // escape character
  196. f: '\f', // form feed
  197. n: '\n', // line feed
  198. r: '\r', // carriage return
  199. t: '\t', // horizontal tab
  200. v: '\v', // vertical tab
  201. N: '\u0085', // Unicode next line
  202. _: '\u00a0', // Unicode non-breaking space
  203. L: '\u2028', // Unicode line separator
  204. P: '\u2029', // Unicode paragraph separator
  205. ' ': ' ',
  206. '"': '"',
  207. '/': '/',
  208. '\\': '\\',
  209. '\t': '\t'
  210. };
  211. function parseCharCode(source, offset, length, onError) {
  212. const cc = source.substr(offset, length);
  213. const ok = cc.length === length && /^[0-9a-fA-F]+$/.test(cc);
  214. const code = ok ? parseInt(cc, 16) : NaN;
  215. if (isNaN(code)) {
  216. const raw = source.substr(offset - 2, length + 2);
  217. onError(offset - 2, 'BAD_DQ_ESCAPE', `Invalid escape sequence ${raw}`);
  218. return raw;
  219. }
  220. return String.fromCodePoint(code);
  221. }
  222. exports.resolveFlowScalar = resolveFlowScalar;