123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717 |
- import { BOM, DOCUMENT, FLOW_END, SCALAR } from './cst.js';
- /*
- START -> stream
- stream
- directive -> line-end -> stream
- indent + line-end -> stream
- [else] -> line-start
- line-end
- comment -> line-end
- newline -> .
- input-end -> END
- line-start
- doc-start -> doc
- doc-end -> stream
- [else] -> indent -> block-start
- block-start
- seq-item-start -> block-start
- explicit-key-start -> block-start
- map-value-start -> block-start
- [else] -> doc
- doc
- line-end -> line-start
- spaces -> doc
- anchor -> doc
- tag -> doc
- flow-start -> flow -> doc
- flow-end -> error -> doc
- seq-item-start -> error -> doc
- explicit-key-start -> error -> doc
- map-value-start -> doc
- alias -> doc
- quote-start -> quoted-scalar -> doc
- block-scalar-header -> line-end -> block-scalar(min) -> line-start
- [else] -> plain-scalar(false, min) -> doc
- flow
- line-end -> flow
- spaces -> flow
- anchor -> flow
- tag -> flow
- flow-start -> flow -> flow
- flow-end -> .
- seq-item-start -> error -> flow
- explicit-key-start -> flow
- map-value-start -> flow
- alias -> flow
- quote-start -> quoted-scalar -> flow
- comma -> flow
- [else] -> plain-scalar(true, 0) -> flow
- quoted-scalar
- quote-end -> .
- [else] -> quoted-scalar
- block-scalar(min)
- newline + peek(indent < min) -> .
- [else] -> block-scalar(min)
- plain-scalar(is-flow, min)
- scalar-end(is-flow) -> .
- peek(newline + (indent < min)) -> .
- [else] -> plain-scalar(min)
- */
- function isEmpty(ch) {
- switch (ch) {
- case undefined:
- case ' ':
- case '\n':
- case '\r':
- case '\t':
- return true;
- default:
- return false;
- }
- }
- const hexDigits = new Set('0123456789ABCDEFabcdef');
- const tagChars = new Set("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-#;/?:@&=+$_.!~*'()");
- const flowIndicatorChars = new Set(',[]{}');
- const invalidAnchorChars = new Set(' ,[]{}\n\r\t');
- const isNotAnchorChar = (ch) => !ch || invalidAnchorChars.has(ch);
- /**
- * Splits an input string into lexical tokens, i.e. smaller strings that are
- * easily identifiable by `tokens.tokenType()`.
- *
- * Lexing starts always in a "stream" context. Incomplete input may be buffered
- * until a complete token can be emitted.
- *
- * In addition to slices of the original input, the following control characters
- * may also be emitted:
- *
- * - `\x02` (Start of Text): A document starts with the next token
- * - `\x18` (Cancel): Unexpected end of flow-mode (indicates an error)
- * - `\x1f` (Unit Separator): Next token is a scalar value
- * - `\u{FEFF}` (Byte order mark): Emitted separately outside documents
- */
- class Lexer {
- constructor() {
- /**
- * Flag indicating whether the end of the current buffer marks the end of
- * all input
- */
- this.atEnd = false;
- /**
- * Explicit indent set in block scalar header, as an offset from the current
- * minimum indent, so e.g. set to 1 from a header `|2+`. Set to -1 if not
- * explicitly set.
- */
- this.blockScalarIndent = -1;
- /**
- * Block scalars that include a + (keep) chomping indicator in their header
- * include trailing empty lines, which are otherwise excluded from the
- * scalar's contents.
- */
- this.blockScalarKeep = false;
- /** Current input */
- this.buffer = '';
- /**
- * Flag noting whether the map value indicator : can immediately follow this
- * node within a flow context.
- */
- this.flowKey = false;
- /** Count of surrounding flow collection levels. */
- this.flowLevel = 0;
- /**
- * Minimum level of indentation required for next lines to be parsed as a
- * part of the current scalar value.
- */
- this.indentNext = 0;
- /** Indentation level of the current line. */
- this.indentValue = 0;
- /** Position of the next \n character. */
- this.lineEndPos = null;
- /** Stores the state of the lexer if reaching the end of incpomplete input */
- this.next = null;
- /** A pointer to `buffer`; the current position of the lexer. */
- this.pos = 0;
- }
- /**
- * Generate YAML tokens from the `source` string. If `incomplete`,
- * a part of the last line may be left as a buffer for the next call.
- *
- * @returns A generator of lexical tokens
- */
- *lex(source, incomplete = false) {
- if (source) {
- if (typeof source !== 'string')
- throw TypeError('source is not a string');
- this.buffer = this.buffer ? this.buffer + source : source;
- this.lineEndPos = null;
- }
- this.atEnd = !incomplete;
- let next = this.next ?? 'stream';
- while (next && (incomplete || this.hasChars(1)))
- next = yield* this.parseNext(next);
- }
- atLineEnd() {
- let i = this.pos;
- let ch = this.buffer[i];
- while (ch === ' ' || ch === '\t')
- ch = this.buffer[++i];
- if (!ch || ch === '#' || ch === '\n')
- return true;
- if (ch === '\r')
- return this.buffer[i + 1] === '\n';
- return false;
- }
- charAt(n) {
- return this.buffer[this.pos + n];
- }
- continueScalar(offset) {
- let ch = this.buffer[offset];
- if (this.indentNext > 0) {
- let indent = 0;
- while (ch === ' ')
- ch = this.buffer[++indent + offset];
- if (ch === '\r') {
- const next = this.buffer[indent + offset + 1];
- if (next === '\n' || (!next && !this.atEnd))
- return offset + indent + 1;
- }
- return ch === '\n' || indent >= this.indentNext || (!ch && !this.atEnd)
- ? offset + indent
- : -1;
- }
- if (ch === '-' || ch === '.') {
- const dt = this.buffer.substr(offset, 3);
- if ((dt === '---' || dt === '...') && isEmpty(this.buffer[offset + 3]))
- return -1;
- }
- return offset;
- }
- getLine() {
- let end = this.lineEndPos;
- if (typeof end !== 'number' || (end !== -1 && end < this.pos)) {
- end = this.buffer.indexOf('\n', this.pos);
- this.lineEndPos = end;
- }
- if (end === -1)
- return this.atEnd ? this.buffer.substring(this.pos) : null;
- if (this.buffer[end - 1] === '\r')
- end -= 1;
- return this.buffer.substring(this.pos, end);
- }
- hasChars(n) {
- return this.pos + n <= this.buffer.length;
- }
- setNext(state) {
- this.buffer = this.buffer.substring(this.pos);
- this.pos = 0;
- this.lineEndPos = null;
- this.next = state;
- return null;
- }
- peek(n) {
- return this.buffer.substr(this.pos, n);
- }
- *parseNext(next) {
- switch (next) {
- case 'stream':
- return yield* this.parseStream();
- case 'line-start':
- return yield* this.parseLineStart();
- case 'block-start':
- return yield* this.parseBlockStart();
- case 'doc':
- return yield* this.parseDocument();
- case 'flow':
- return yield* this.parseFlowCollection();
- case 'quoted-scalar':
- return yield* this.parseQuotedScalar();
- case 'block-scalar':
- return yield* this.parseBlockScalar();
- case 'plain-scalar':
- return yield* this.parsePlainScalar();
- }
- }
- *parseStream() {
- let line = this.getLine();
- if (line === null)
- return this.setNext('stream');
- if (line[0] === BOM) {
- yield* this.pushCount(1);
- line = line.substring(1);
- }
- if (line[0] === '%') {
- let dirEnd = line.length;
- let cs = line.indexOf('#');
- while (cs !== -1) {
- const ch = line[cs - 1];
- if (ch === ' ' || ch === '\t') {
- dirEnd = cs - 1;
- break;
- }
- else {
- cs = line.indexOf('#', cs + 1);
- }
- }
- while (true) {
- const ch = line[dirEnd - 1];
- if (ch === ' ' || ch === '\t')
- dirEnd -= 1;
- else
- break;
- }
- const n = (yield* this.pushCount(dirEnd)) + (yield* this.pushSpaces(true));
- yield* this.pushCount(line.length - n); // possible comment
- this.pushNewline();
- return 'stream';
- }
- if (this.atLineEnd()) {
- const sp = yield* this.pushSpaces(true);
- yield* this.pushCount(line.length - sp);
- yield* this.pushNewline();
- return 'stream';
- }
- yield DOCUMENT;
- return yield* this.parseLineStart();
- }
- *parseLineStart() {
- const ch = this.charAt(0);
- if (!ch && !this.atEnd)
- return this.setNext('line-start');
- if (ch === '-' || ch === '.') {
- if (!this.atEnd && !this.hasChars(4))
- return this.setNext('line-start');
- const s = this.peek(3);
- if ((s === '---' || s === '...') && isEmpty(this.charAt(3))) {
- yield* this.pushCount(3);
- this.indentValue = 0;
- this.indentNext = 0;
- return s === '---' ? 'doc' : 'stream';
- }
- }
- this.indentValue = yield* this.pushSpaces(false);
- if (this.indentNext > this.indentValue && !isEmpty(this.charAt(1)))
- this.indentNext = this.indentValue;
- return yield* this.parseBlockStart();
- }
- *parseBlockStart() {
- const [ch0, ch1] = this.peek(2);
- if (!ch1 && !this.atEnd)
- return this.setNext('block-start');
- if ((ch0 === '-' || ch0 === '?' || ch0 === ':') && isEmpty(ch1)) {
- const n = (yield* this.pushCount(1)) + (yield* this.pushSpaces(true));
- this.indentNext = this.indentValue + 1;
- this.indentValue += n;
- return yield* this.parseBlockStart();
- }
- return 'doc';
- }
- *parseDocument() {
- yield* this.pushSpaces(true);
- const line = this.getLine();
- if (line === null)
- return this.setNext('doc');
- let n = yield* this.pushIndicators();
- switch (line[n]) {
- case '#':
- yield* this.pushCount(line.length - n);
- // fallthrough
- case undefined:
- yield* this.pushNewline();
- return yield* this.parseLineStart();
- case '{':
- case '[':
- yield* this.pushCount(1);
- this.flowKey = false;
- this.flowLevel = 1;
- return 'flow';
- case '}':
- case ']':
- // this is an error
- yield* this.pushCount(1);
- return 'doc';
- case '*':
- yield* this.pushUntil(isNotAnchorChar);
- return 'doc';
- case '"':
- case "'":
- return yield* this.parseQuotedScalar();
- case '|':
- case '>':
- n += yield* this.parseBlockScalarHeader();
- n += yield* this.pushSpaces(true);
- yield* this.pushCount(line.length - n);
- yield* this.pushNewline();
- return yield* this.parseBlockScalar();
- default:
- return yield* this.parsePlainScalar();
- }
- }
- *parseFlowCollection() {
- let nl, sp;
- let indent = -1;
- do {
- nl = yield* this.pushNewline();
- if (nl > 0) {
- sp = yield* this.pushSpaces(false);
- this.indentValue = indent = sp;
- }
- else {
- sp = 0;
- }
- sp += yield* this.pushSpaces(true);
- } while (nl + sp > 0);
- const line = this.getLine();
- if (line === null)
- return this.setNext('flow');
- if ((indent !== -1 && indent < this.indentNext && line[0] !== '#') ||
- (indent === 0 &&
- (line.startsWith('---') || line.startsWith('...')) &&
- isEmpty(line[3]))) {
- // Allowing for the terminal ] or } at the same (rather than greater)
- // indent level as the initial [ or { is technically invalid, but
- // failing here would be surprising to users.
- const atFlowEndMarker = indent === this.indentNext - 1 &&
- this.flowLevel === 1 &&
- (line[0] === ']' || line[0] === '}');
- if (!atFlowEndMarker) {
- // this is an error
- this.flowLevel = 0;
- yield FLOW_END;
- return yield* this.parseLineStart();
- }
- }
- let n = 0;
- while (line[n] === ',') {
- n += yield* this.pushCount(1);
- n += yield* this.pushSpaces(true);
- this.flowKey = false;
- }
- n += yield* this.pushIndicators();
- switch (line[n]) {
- case undefined:
- return 'flow';
- case '#':
- yield* this.pushCount(line.length - n);
- return 'flow';
- case '{':
- case '[':
- yield* this.pushCount(1);
- this.flowKey = false;
- this.flowLevel += 1;
- return 'flow';
- case '}':
- case ']':
- yield* this.pushCount(1);
- this.flowKey = true;
- this.flowLevel -= 1;
- return this.flowLevel ? 'flow' : 'doc';
- case '*':
- yield* this.pushUntil(isNotAnchorChar);
- return 'flow';
- case '"':
- case "'":
- this.flowKey = true;
- return yield* this.parseQuotedScalar();
- case ':': {
- const next = this.charAt(1);
- if (this.flowKey || isEmpty(next) || next === ',') {
- this.flowKey = false;
- yield* this.pushCount(1);
- yield* this.pushSpaces(true);
- return 'flow';
- }
- }
- // fallthrough
- default:
- this.flowKey = false;
- return yield* this.parsePlainScalar();
- }
- }
- *parseQuotedScalar() {
- const quote = this.charAt(0);
- let end = this.buffer.indexOf(quote, this.pos + 1);
- if (quote === "'") {
- while (end !== -1 && this.buffer[end + 1] === "'")
- end = this.buffer.indexOf("'", end + 2);
- }
- else {
- // double-quote
- while (end !== -1) {
- let n = 0;
- while (this.buffer[end - 1 - n] === '\\')
- n += 1;
- if (n % 2 === 0)
- break;
- end = this.buffer.indexOf('"', end + 1);
- }
- }
- // Only looking for newlines within the quotes
- const qb = this.buffer.substring(0, end);
- let nl = qb.indexOf('\n', this.pos);
- if (nl !== -1) {
- while (nl !== -1) {
- const cs = this.continueScalar(nl + 1);
- if (cs === -1)
- break;
- nl = qb.indexOf('\n', cs);
- }
- if (nl !== -1) {
- // this is an error caused by an unexpected unindent
- end = nl - (qb[nl - 1] === '\r' ? 2 : 1);
- }
- }
- if (end === -1) {
- if (!this.atEnd)
- return this.setNext('quoted-scalar');
- end = this.buffer.length;
- }
- yield* this.pushToIndex(end + 1, false);
- return this.flowLevel ? 'flow' : 'doc';
- }
- *parseBlockScalarHeader() {
- this.blockScalarIndent = -1;
- this.blockScalarKeep = false;
- let i = this.pos;
- while (true) {
- const ch = this.buffer[++i];
- if (ch === '+')
- this.blockScalarKeep = true;
- else if (ch > '0' && ch <= '9')
- this.blockScalarIndent = Number(ch) - 1;
- else if (ch !== '-')
- break;
- }
- return yield* this.pushUntil(ch => isEmpty(ch) || ch === '#');
- }
- *parseBlockScalar() {
- let nl = this.pos - 1; // may be -1 if this.pos === 0
- let indent = 0;
- let ch;
- loop: for (let i = this.pos; (ch = this.buffer[i]); ++i) {
- switch (ch) {
- case ' ':
- indent += 1;
- break;
- case '\n':
- nl = i;
- indent = 0;
- break;
- case '\r': {
- const next = this.buffer[i + 1];
- if (!next && !this.atEnd)
- return this.setNext('block-scalar');
- if (next === '\n')
- break;
- } // fallthrough
- default:
- break loop;
- }
- }
- if (!ch && !this.atEnd)
- return this.setNext('block-scalar');
- if (indent >= this.indentNext) {
- if (this.blockScalarIndent === -1)
- this.indentNext = indent;
- else {
- this.indentNext =
- this.blockScalarIndent + (this.indentNext === 0 ? 1 : this.indentNext);
- }
- do {
- const cs = this.continueScalar(nl + 1);
- if (cs === -1)
- break;
- nl = this.buffer.indexOf('\n', cs);
- } while (nl !== -1);
- if (nl === -1) {
- if (!this.atEnd)
- return this.setNext('block-scalar');
- nl = this.buffer.length;
- }
- }
- // Trailing insufficiently indented tabs are invalid.
- // To catch that during parsing, we include them in the block scalar value.
- let i = nl + 1;
- ch = this.buffer[i];
- while (ch === ' ')
- ch = this.buffer[++i];
- if (ch === '\t') {
- while (ch === '\t' || ch === ' ' || ch === '\r' || ch === '\n')
- ch = this.buffer[++i];
- nl = i - 1;
- }
- else if (!this.blockScalarKeep) {
- do {
- let i = nl - 1;
- let ch = this.buffer[i];
- if (ch === '\r')
- ch = this.buffer[--i];
- const lastChar = i; // Drop the line if last char not more indented
- while (ch === ' ')
- ch = this.buffer[--i];
- if (ch === '\n' && i >= this.pos && i + 1 + indent > lastChar)
- nl = i;
- else
- break;
- } while (true);
- }
- yield SCALAR;
- yield* this.pushToIndex(nl + 1, true);
- return yield* this.parseLineStart();
- }
- *parsePlainScalar() {
- const inFlow = this.flowLevel > 0;
- let end = this.pos - 1;
- let i = this.pos - 1;
- let ch;
- while ((ch = this.buffer[++i])) {
- if (ch === ':') {
- const next = this.buffer[i + 1];
- if (isEmpty(next) || (inFlow && flowIndicatorChars.has(next)))
- break;
- end = i;
- }
- else if (isEmpty(ch)) {
- let next = this.buffer[i + 1];
- if (ch === '\r') {
- if (next === '\n') {
- i += 1;
- ch = '\n';
- next = this.buffer[i + 1];
- }
- else
- end = i;
- }
- if (next === '#' || (inFlow && flowIndicatorChars.has(next)))
- break;
- if (ch === '\n') {
- const cs = this.continueScalar(i + 1);
- if (cs === -1)
- break;
- i = Math.max(i, cs - 2); // to advance, but still account for ' #'
- }
- }
- else {
- if (inFlow && flowIndicatorChars.has(ch))
- break;
- end = i;
- }
- }
- if (!ch && !this.atEnd)
- return this.setNext('plain-scalar');
- yield SCALAR;
- yield* this.pushToIndex(end + 1, true);
- return inFlow ? 'flow' : 'doc';
- }
- *pushCount(n) {
- if (n > 0) {
- yield this.buffer.substr(this.pos, n);
- this.pos += n;
- return n;
- }
- return 0;
- }
- *pushToIndex(i, allowEmpty) {
- const s = this.buffer.slice(this.pos, i);
- if (s) {
- yield s;
- this.pos += s.length;
- return s.length;
- }
- else if (allowEmpty)
- yield '';
- return 0;
- }
- *pushIndicators() {
- switch (this.charAt(0)) {
- case '!':
- return ((yield* this.pushTag()) +
- (yield* this.pushSpaces(true)) +
- (yield* this.pushIndicators()));
- case '&':
- return ((yield* this.pushUntil(isNotAnchorChar)) +
- (yield* this.pushSpaces(true)) +
- (yield* this.pushIndicators()));
- case '-': // this is an error
- case '?': // this is an error outside flow collections
- case ':': {
- const inFlow = this.flowLevel > 0;
- const ch1 = this.charAt(1);
- if (isEmpty(ch1) || (inFlow && flowIndicatorChars.has(ch1))) {
- if (!inFlow)
- this.indentNext = this.indentValue + 1;
- else if (this.flowKey)
- this.flowKey = false;
- return ((yield* this.pushCount(1)) +
- (yield* this.pushSpaces(true)) +
- (yield* this.pushIndicators()));
- }
- }
- }
- return 0;
- }
- *pushTag() {
- if (this.charAt(1) === '<') {
- let i = this.pos + 2;
- let ch = this.buffer[i];
- while (!isEmpty(ch) && ch !== '>')
- ch = this.buffer[++i];
- return yield* this.pushToIndex(ch === '>' ? i + 1 : i, false);
- }
- else {
- let i = this.pos + 1;
- let ch = this.buffer[i];
- while (ch) {
- if (tagChars.has(ch))
- ch = this.buffer[++i];
- else if (ch === '%' &&
- hexDigits.has(this.buffer[i + 1]) &&
- hexDigits.has(this.buffer[i + 2])) {
- ch = this.buffer[(i += 3)];
- }
- else
- break;
- }
- return yield* this.pushToIndex(i, false);
- }
- }
- *pushNewline() {
- const ch = this.buffer[this.pos];
- if (ch === '\n')
- return yield* this.pushCount(1);
- else if (ch === '\r' && this.charAt(1) === '\n')
- return yield* this.pushCount(2);
- else
- return 0;
- }
- *pushSpaces(allowTabs) {
- let i = this.pos - 1;
- let ch;
- do {
- ch = this.buffer[++i];
- } while (ch === ' ' || (allowTabs && ch === '\t'));
- const n = i - this.pos;
- if (n > 0) {
- yield this.buffer.substr(this.pos, n);
- this.pos = i;
- }
- return n;
- }
- *pushUntil(test) {
- let i = this.pos;
- let ch = this.buffer[i];
- while (!test(ch))
- ch = this.buffer[++i];
- return yield* this.pushToIndex(i, false);
- }
- }
- export { Lexer };
|