resolve-flow-scalar.js 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223
  1. import { Scalar } from '../nodes/Scalar.js';
  2. import { resolveEnd } from './resolve-end.js';
  3. function resolveFlowScalar(scalar, strict, onError) {
  4. const { offset, type, source, end } = scalar;
  5. let _type;
  6. let value;
  7. const _onError = (rel, code, msg) => onError(offset + rel, code, msg);
  8. switch (type) {
  9. case 'scalar':
  10. _type = Scalar.PLAIN;
  11. value = plainValue(source, _onError);
  12. break;
  13. case 'single-quoted-scalar':
  14. _type = Scalar.QUOTE_SINGLE;
  15. value = singleQuotedValue(source, _onError);
  16. break;
  17. case 'double-quoted-scalar':
  18. _type = Scalar.QUOTE_DOUBLE;
  19. value = doubleQuotedValue(source, _onError);
  20. break;
  21. /* istanbul ignore next should not happen */
  22. default:
  23. onError(scalar, 'UNEXPECTED_TOKEN', `Expected a flow scalar value, but found: ${type}`);
  24. return {
  25. value: '',
  26. type: null,
  27. comment: '',
  28. range: [offset, offset + source.length, offset + source.length]
  29. };
  30. }
  31. const valueEnd = offset + source.length;
  32. const re = resolveEnd(end, valueEnd, strict, onError);
  33. return {
  34. value,
  35. type: _type,
  36. comment: re.comment,
  37. range: [offset, valueEnd, re.offset]
  38. };
  39. }
  40. function plainValue(source, onError) {
  41. let badChar = '';
  42. switch (source[0]) {
  43. /* istanbul ignore next should not happen */
  44. case '\t':
  45. badChar = 'a tab character';
  46. break;
  47. case ',':
  48. badChar = 'flow indicator character ,';
  49. break;
  50. case '%':
  51. badChar = 'directive indicator character %';
  52. break;
  53. case '|':
  54. case '>': {
  55. badChar = `block scalar indicator ${source[0]}`;
  56. break;
  57. }
  58. case '@':
  59. case '`': {
  60. badChar = `reserved character ${source[0]}`;
  61. break;
  62. }
  63. }
  64. if (badChar)
  65. onError(0, 'BAD_SCALAR_START', `Plain value cannot start with ${badChar}`);
  66. return foldLines(source);
  67. }
  68. function singleQuotedValue(source, onError) {
  69. if (source[source.length - 1] !== "'" || source.length === 1)
  70. onError(source.length, 'MISSING_CHAR', "Missing closing 'quote");
  71. return foldLines(source.slice(1, -1)).replace(/''/g, "'");
  72. }
  73. function foldLines(source) {
  74. /**
  75. * The negative lookbehind here and in the `re` RegExp is to
  76. * prevent causing a polynomial search time in certain cases.
  77. *
  78. * The try-catch is for Safari, which doesn't support this yet:
  79. * https://caniuse.com/js-regexp-lookbehind
  80. */
  81. let first, line;
  82. try {
  83. first = new RegExp('(.*?)(?<![ \t])[ \t]*\r?\n', 'sy');
  84. line = new RegExp('[ \t]*(.*?)(?:(?<![ \t])[ \t]*)?\r?\n', 'sy');
  85. }
  86. catch {
  87. first = /(.*?)[ \t]*\r?\n/sy;
  88. line = /[ \t]*(.*?)[ \t]*\r?\n/sy;
  89. }
  90. let match = first.exec(source);
  91. if (!match)
  92. return source;
  93. let res = match[1];
  94. let sep = ' ';
  95. let pos = first.lastIndex;
  96. line.lastIndex = pos;
  97. while ((match = line.exec(source))) {
  98. if (match[1] === '') {
  99. if (sep === '\n')
  100. res += sep;
  101. else
  102. sep = '\n';
  103. }
  104. else {
  105. res += sep + match[1];
  106. sep = ' ';
  107. }
  108. pos = line.lastIndex;
  109. }
  110. const last = /[ \t]*(.*)/sy;
  111. last.lastIndex = pos;
  112. match = last.exec(source);
  113. return res + sep + (match?.[1] ?? '');
  114. }
  115. function doubleQuotedValue(source, onError) {
  116. let res = '';
  117. for (let i = 1; i < source.length - 1; ++i) {
  118. const ch = source[i];
  119. if (ch === '\r' && source[i + 1] === '\n')
  120. continue;
  121. if (ch === '\n') {
  122. const { fold, offset } = foldNewline(source, i);
  123. res += fold;
  124. i = offset;
  125. }
  126. else if (ch === '\\') {
  127. let next = source[++i];
  128. const cc = escapeCodes[next];
  129. if (cc)
  130. res += cc;
  131. else if (next === '\n') {
  132. // skip escaped newlines, but still trim the following line
  133. next = source[i + 1];
  134. while (next === ' ' || next === '\t')
  135. next = source[++i + 1];
  136. }
  137. else if (next === '\r' && source[i + 1] === '\n') {
  138. // skip escaped CRLF newlines, but still trim the following line
  139. next = source[++i + 1];
  140. while (next === ' ' || next === '\t')
  141. next = source[++i + 1];
  142. }
  143. else if (next === 'x' || next === 'u' || next === 'U') {
  144. const length = { x: 2, u: 4, U: 8 }[next];
  145. res += parseCharCode(source, i + 1, length, onError);
  146. i += length;
  147. }
  148. else {
  149. const raw = source.substr(i - 1, 2);
  150. onError(i - 1, 'BAD_DQ_ESCAPE', `Invalid escape sequence ${raw}`);
  151. res += raw;
  152. }
  153. }
  154. else if (ch === ' ' || ch === '\t') {
  155. // trim trailing whitespace
  156. const wsStart = i;
  157. let next = source[i + 1];
  158. while (next === ' ' || next === '\t')
  159. next = source[++i + 1];
  160. if (next !== '\n' && !(next === '\r' && source[i + 2] === '\n'))
  161. res += i > wsStart ? source.slice(wsStart, i + 1) : ch;
  162. }
  163. else {
  164. res += ch;
  165. }
  166. }
  167. if (source[source.length - 1] !== '"' || source.length === 1)
  168. onError(source.length, 'MISSING_CHAR', 'Missing closing "quote');
  169. return res;
  170. }
  171. /**
  172. * Fold a single newline into a space, multiple newlines to N - 1 newlines.
  173. * Presumes `source[offset] === '\n'`
  174. */
  175. function foldNewline(source, offset) {
  176. let fold = '';
  177. let ch = source[offset + 1];
  178. while (ch === ' ' || ch === '\t' || ch === '\n' || ch === '\r') {
  179. if (ch === '\r' && source[offset + 2] !== '\n')
  180. break;
  181. if (ch === '\n')
  182. fold += '\n';
  183. offset += 1;
  184. ch = source[offset + 1];
  185. }
  186. if (!fold)
  187. fold = ' ';
  188. return { fold, offset };
  189. }
  190. const escapeCodes = {
  191. '0': '\0', // null character
  192. a: '\x07', // bell character
  193. b: '\b', // backspace
  194. e: '\x1b', // escape character
  195. f: '\f', // form feed
  196. n: '\n', // line feed
  197. r: '\r', // carriage return
  198. t: '\t', // horizontal tab
  199. v: '\v', // vertical tab
  200. N: '\u0085', // Unicode next line
  201. _: '\u00a0', // Unicode non-breaking space
  202. L: '\u2028', // Unicode line separator
  203. P: '\u2029', // Unicode paragraph separator
  204. ' ': ' ',
  205. '"': '"',
  206. '/': '/',
  207. '\\': '\\',
  208. '\t': '\t'
  209. };
  210. function parseCharCode(source, offset, length, onError) {
  211. const cc = source.substr(offset, length);
  212. const ok = cc.length === length && /^[0-9a-fA-F]+$/.test(cc);
  213. const code = ok ? parseInt(cc, 16) : NaN;
  214. if (isNaN(code)) {
  215. const raw = source.substr(offset - 2, length + 2);
  216. onError(offset - 2, 'BAD_DQ_ESCAPE', `Invalid escape sequence ${raw}`);
  217. return raw;
  218. }
  219. return String.fromCodePoint(code);
  220. }
  221. export { resolveFlowScalar };