123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537 |
- /**
- * @author Toru Nagashima <https://github.com/mysticatea>
- */
- "use strict";
- const {
- CALL,
- CONSTRUCT,
- ReferenceTracker,
- getStaticValue,
- getStringIfConstant
- } = require("@eslint-community/eslint-utils");
- const { RegExpParser, visitRegExpAST } = require("@eslint-community/regexpp");
- const { isCombiningCharacter, isEmojiModifier, isRegionalIndicatorSymbol, isSurrogatePair } = require("./utils/unicode");
- const astUtils = require("./utils/ast-utils.js");
- const { isValidWithUnicodeFlag } = require("./utils/regular-expressions");
- const { parseStringLiteral, parseTemplateToken } = require("./utils/char-source");
- //------------------------------------------------------------------------------
- // Helpers
- //------------------------------------------------------------------------------
- /**
- * @typedef {import('@eslint-community/regexpp').AST.Character} Character
- * @typedef {import('@eslint-community/regexpp').AST.CharacterClassElement} CharacterClassElement
- */
- /**
- * Iterate character sequences of a given nodes.
- *
- * CharacterClassRange syntax can steal a part of character sequence,
- * so this function reverts CharacterClassRange syntax and restore the sequence.
- * @param {CharacterClassElement[]} nodes The node list to iterate character sequences.
- * @returns {IterableIterator<Character[]>} The list of character sequences.
- */
- function *iterateCharacterSequence(nodes) {
- /** @type {Character[]} */
- let seq = [];
- for (const node of nodes) {
- switch (node.type) {
- case "Character":
- seq.push(node);
- break;
- case "CharacterClassRange":
- seq.push(node.min);
- yield seq;
- seq = [node.max];
- break;
- case "CharacterSet":
- case "CharacterClass": // [[]] nesting character class
- case "ClassStringDisjunction": // \q{...}
- case "ExpressionCharacterClass": // [A--B]
- if (seq.length > 0) {
- yield seq;
- seq = [];
- }
- break;
- // no default
- }
- }
- if (seq.length > 0) {
- yield seq;
- }
- }
- /**
- * Checks whether the given character node is a Unicode code point escape or not.
- * @param {Character} char the character node to check.
- * @returns {boolean} `true` if the character node is a Unicode code point escape.
- */
- function isUnicodeCodePointEscape(char) {
- return /^\\u\{[\da-f]+\}$/iu.test(char.raw);
- }
- /**
- * Each function returns matched characters if it detects that kind of problem.
- * @type {Record<string, (chars: Character[]) => IterableIterator<Character[]>>}
- */
- const findCharacterSequences = {
- *surrogatePairWithoutUFlag(chars) {
- for (const [index, char] of chars.entries()) {
- const previous = chars[index - 1];
- if (
- previous && char &&
- isSurrogatePair(previous.value, char.value) &&
- !isUnicodeCodePointEscape(previous) &&
- !isUnicodeCodePointEscape(char)
- ) {
- yield [previous, char];
- }
- }
- },
- *surrogatePair(chars) {
- for (const [index, char] of chars.entries()) {
- const previous = chars[index - 1];
- if (
- previous && char &&
- isSurrogatePair(previous.value, char.value) &&
- (
- isUnicodeCodePointEscape(previous) ||
- isUnicodeCodePointEscape(char)
- )
- ) {
- yield [previous, char];
- }
- }
- },
- *combiningClass(chars, unfilteredChars) {
- /*
- * When `allowEscape` is `true`, a combined character should only be allowed if the combining mark appears as an escape sequence.
- * This means that the base character should be considered even if it's escaped.
- */
- for (const [index, char] of chars.entries()) {
- const previous = unfilteredChars[index - 1];
- if (
- previous && char &&
- isCombiningCharacter(char.value) &&
- !isCombiningCharacter(previous.value)
- ) {
- yield [previous, char];
- }
- }
- },
- *emojiModifier(chars) {
- for (const [index, char] of chars.entries()) {
- const previous = chars[index - 1];
- if (
- previous && char &&
- isEmojiModifier(char.value) &&
- !isEmojiModifier(previous.value)
- ) {
- yield [previous, char];
- }
- }
- },
- *regionalIndicatorSymbol(chars) {
- for (const [index, char] of chars.entries()) {
- const previous = chars[index - 1];
- if (
- previous && char &&
- isRegionalIndicatorSymbol(char.value) &&
- isRegionalIndicatorSymbol(previous.value)
- ) {
- yield [previous, char];
- }
- }
- },
- *zwj(chars) {
- let sequence = null;
- for (const [index, char] of chars.entries()) {
- const previous = chars[index - 1];
- const next = chars[index + 1];
- if (
- previous && char && next &&
- char.value === 0x200d &&
- previous.value !== 0x200d &&
- next.value !== 0x200d
- ) {
- if (sequence) {
- if (sequence.at(-1) === previous) {
- sequence.push(char, next); // append to the sequence
- } else {
- yield sequence;
- sequence = chars.slice(index - 1, index + 2);
- }
- } else {
- sequence = chars.slice(index - 1, index + 2);
- }
- }
- }
- if (sequence) {
- yield sequence;
- }
- }
- };
- const kinds = Object.keys(findCharacterSequences);
- /**
- * Gets the value of the given node if it's a static value other than a regular expression object,
- * or the node's `regex` property.
- * The purpose of this method is to provide a replacement for `getStaticValue` in environments where certain regular expressions cannot be evaluated.
- * A known example is Node.js 18 which does not support the `v` flag.
- * Calling `getStaticValue` on a regular expression node with the `v` flag on Node.js 18 always returns `null`.
- * A limitation of this method is that it can only detect a regular expression if the specified node is itself a regular expression literal node.
- * @param {ASTNode | undefined} node The node to be inspected.
- * @param {Scope} initialScope Scope to start finding variables. This function tries to resolve identifier references which are in the given scope.
- * @returns {{ value: any } | { regex: { pattern: string, flags: string } } | null} The static value of the node, or `null`.
- */
- function getStaticValueOrRegex(node, initialScope) {
- if (!node) {
- return null;
- }
- if (node.type === "Literal" && node.regex) {
- return { regex: node.regex };
- }
- const staticValue = getStaticValue(node, initialScope);
- if (staticValue?.value instanceof RegExp) {
- return null;
- }
- return staticValue;
- }
- /**
- * Checks whether a specified regexpp character is represented as an acceptable escape sequence.
- * This function requires the source text of the character to be known.
- * @param {Character} char Character to check.
- * @param {string} charSource Source text of the character to check.
- * @returns {boolean} Whether the specified regexpp character is represented as an acceptable escape sequence.
- */
- function checkForAcceptableEscape(char, charSource) {
- if (!charSource.startsWith("\\")) {
- return false;
- }
- const match = /(?<=^\\+).$/su.exec(charSource);
- return match?.[0] !== String.fromCodePoint(char.value);
- }
- /**
- * Checks whether a specified regexpp character is represented as an acceptable escape sequence.
- * This function works with characters that are produced by a string or template literal.
- * It requires the source text and the CodeUnit list of the literal to be known.
- * @param {Character} char Character to check.
- * @param {string} nodeSource Source text of the string or template literal that produces the character.
- * @param {CodeUnit[]} codeUnits List of CodeUnit objects of the literal that produces the character.
- * @returns {boolean} Whether the specified regexpp character is represented as an acceptable escape sequence.
- */
- function checkForAcceptableEscapeInString(char, nodeSource, codeUnits) {
- const firstIndex = char.start;
- const lastIndex = char.end - 1;
- const start = codeUnits[firstIndex].start;
- const end = codeUnits[lastIndex].end;
- const charSource = nodeSource.slice(start, end);
- return checkForAcceptableEscape(char, charSource);
- }
- //------------------------------------------------------------------------------
- // Rule Definition
- //------------------------------------------------------------------------------
- /** @type {import('../shared/types').Rule} */
- module.exports = {
- meta: {
- type: "problem",
- docs: {
- description: "Disallow characters which are made with multiple code points in character class syntax",
- recommended: true,
- url: "https://eslint.org/docs/latest/rules/no-misleading-character-class"
- },
- hasSuggestions: true,
- schema: [
- {
- type: "object",
- properties: {
- allowEscape: {
- type: "boolean",
- default: false
- }
- },
- additionalProperties: false
- }
- ],
- messages: {
- surrogatePairWithoutUFlag: "Unexpected surrogate pair in character class. Use 'u' flag.",
- surrogatePair: "Unexpected surrogate pair in character class.",
- combiningClass: "Unexpected combined character in character class.",
- emojiModifier: "Unexpected modified Emoji in character class.",
- regionalIndicatorSymbol: "Unexpected national flag in character class.",
- zwj: "Unexpected joined character sequence in character class.",
- suggestUnicodeFlag: "Add unicode 'u' flag to regex."
- }
- },
- create(context) {
- const allowEscape = context.options[0]?.allowEscape;
- const sourceCode = context.sourceCode;
- const parser = new RegExpParser();
- const checkedPatternNodes = new Set();
- /**
- * Verify a given regular expression.
- * @param {Node} node The node to report.
- * @param {string} pattern The regular expression pattern to verify.
- * @param {string} flags The flags of the regular expression.
- * @param {Function} unicodeFixer Fixer for missing "u" flag.
- * @returns {void}
- */
- function verify(node, pattern, flags, unicodeFixer) {
- let patternNode;
- try {
- patternNode = parser.parsePattern(
- pattern,
- 0,
- pattern.length,
- {
- unicode: flags.includes("u"),
- unicodeSets: flags.includes("v")
- }
- );
- } catch {
- // Ignore regular expressions with syntax errors
- return;
- }
- let codeUnits = null;
- /**
- * Checks whether a specified regexpp character is represented as an acceptable escape sequence.
- * For the purposes of this rule, an escape sequence is considered acceptable if it consists of one or more backslashes followed by the character being escaped.
- * @param {Character} char Character to check.
- * @returns {boolean} Whether the specified regexpp character is represented as an acceptable escape sequence.
- */
- function isAcceptableEscapeSequence(char) {
- if (node.type === "Literal" && node.regex) {
- return checkForAcceptableEscape(char, char.raw);
- }
- if (node.type === "Literal" && typeof node.value === "string") {
- const nodeSource = node.raw;
- codeUnits ??= parseStringLiteral(nodeSource);
- return checkForAcceptableEscapeInString(char, nodeSource, codeUnits);
- }
- if (astUtils.isStaticTemplateLiteral(node)) {
- const nodeSource = sourceCode.getText(node);
- codeUnits ??= parseTemplateToken(nodeSource);
- return checkForAcceptableEscapeInString(char, nodeSource, codeUnits);
- }
- return false;
- }
- const foundKindMatches = new Map();
- visitRegExpAST(patternNode, {
- onCharacterClassEnter(ccNode) {
- for (const unfilteredChars of iterateCharacterSequence(ccNode.elements)) {
- let chars;
- if (allowEscape) {
- // Replace escape sequences with null to avoid having them flagged.
- chars = unfilteredChars.map(char => (isAcceptableEscapeSequence(char) ? null : char));
- } else {
- chars = unfilteredChars;
- }
- for (const kind of kinds) {
- const matches = findCharacterSequences[kind](chars, unfilteredChars);
- if (foundKindMatches.has(kind)) {
- foundKindMatches.get(kind).push(...matches);
- } else {
- foundKindMatches.set(kind, [...matches]);
- }
- }
- }
- }
- });
- /**
- * Finds the report loc(s) for a range of matches.
- * Only literals and expression-less templates generate granular errors.
- * @param {Character[][]} matches Lists of individual characters being reported on.
- * @returns {Location[]} locs for context.report.
- * @see https://github.com/eslint/eslint/pull/17515
- */
- function getNodeReportLocations(matches) {
- if (!astUtils.isStaticTemplateLiteral(node) && node.type !== "Literal") {
- return matches.length ? [node.loc] : [];
- }
- return matches.map(chars => {
- const firstIndex = chars[0].start;
- const lastIndex = chars.at(-1).end - 1;
- let start;
- let end;
- if (node.type === "TemplateLiteral") {
- const source = sourceCode.getText(node);
- const offset = node.range[0];
- codeUnits ??= parseTemplateToken(source);
- start = offset + codeUnits[firstIndex].start;
- end = offset + codeUnits[lastIndex].end;
- } else if (typeof node.value === "string") { // String Literal
- const source = node.raw;
- const offset = node.range[0];
- codeUnits ??= parseStringLiteral(source);
- start = offset + codeUnits[firstIndex].start;
- end = offset + codeUnits[lastIndex].end;
- } else { // RegExp Literal
- const offset = node.range[0] + 1; // Add 1 to skip the leading slash.
- start = offset + firstIndex;
- end = offset + lastIndex + 1;
- }
- return {
- start: sourceCode.getLocFromIndex(start),
- end: sourceCode.getLocFromIndex(end)
- };
- });
- }
- for (const [kind, matches] of foundKindMatches) {
- let suggest;
- if (kind === "surrogatePairWithoutUFlag") {
- suggest = [{
- messageId: "suggestUnicodeFlag",
- fix: unicodeFixer
- }];
- }
- const locs = getNodeReportLocations(matches);
- for (const loc of locs) {
- context.report({
- node,
- loc,
- messageId: kind,
- suggest
- });
- }
- }
- }
- return {
- "Literal[regex]"(node) {
- if (checkedPatternNodes.has(node)) {
- return;
- }
- verify(node, node.regex.pattern, node.regex.flags, fixer => {
- if (!isValidWithUnicodeFlag(context.languageOptions.ecmaVersion, node.regex.pattern)) {
- return null;
- }
- return fixer.insertTextAfter(node, "u");
- });
- },
- "Program"(node) {
- const scope = sourceCode.getScope(node);
- const tracker = new ReferenceTracker(scope);
- /*
- * Iterate calls of RegExp.
- * E.g., `new RegExp()`, `RegExp()`, `new window.RegExp()`,
- * `const {RegExp: a} = window; new a()`, etc...
- */
- for (const { node: refNode } of tracker.iterateGlobalReferences({
- RegExp: { [CALL]: true, [CONSTRUCT]: true }
- })) {
- let pattern, flags;
- const [patternNode, flagsNode] = refNode.arguments;
- const evaluatedPattern = getStaticValueOrRegex(patternNode, scope);
- if (!evaluatedPattern) {
- continue;
- }
- if (flagsNode) {
- if (evaluatedPattern.regex) {
- pattern = evaluatedPattern.regex.pattern;
- checkedPatternNodes.add(patternNode);
- } else {
- pattern = String(evaluatedPattern.value);
- }
- flags = getStringIfConstant(flagsNode, scope);
- } else {
- if (evaluatedPattern.regex) {
- continue;
- }
- pattern = String(evaluatedPattern.value);
- flags = "";
- }
- if (typeof flags === "string") {
- verify(patternNode, pattern, flags, fixer => {
- if (!isValidWithUnicodeFlag(context.languageOptions.ecmaVersion, pattern)) {
- return null;
- }
- if (refNode.arguments.length === 1) {
- const penultimateToken = sourceCode.getLastToken(refNode, { skip: 1 }); // skip closing parenthesis
- return fixer.insertTextAfter(
- penultimateToken,
- astUtils.isCommaToken(penultimateToken)
- ? ' "u",'
- : ', "u"'
- );
- }
- if ((flagsNode.type === "Literal" && typeof flagsNode.value === "string") || flagsNode.type === "TemplateLiteral") {
- const range = [flagsNode.range[0], flagsNode.range[1] - 1];
- return fixer.insertTextAfterRange(range, "u");
- }
- return null;
- });
- }
- }
- }
- };
- }
- };
|