123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274 |
- /*
- didYouMean.js - A simple JavaScript matching engine
- ===================================================
- [Available on GitHub](https://github.com/dcporter/didyoumean.js).
- A super-simple, highly optimized JS library for matching human-quality input to a list of potential
- matches. You can use it to suggest a misspelled command-line utility option to a user, or to offer
- links to nearby valid URLs on your 404 page. (The examples below are taken from a personal project,
- my [HTML5 business card](http://dcporter.aws.af.cm/me), which uses didYouMean.js to suggest correct
- URLs from misspelled ones, such as [dcporter.aws.af.cm/me/instagarm](http://dcporter.aws.af.cm/me/instagarm).)
- Uses the [Levenshtein distance algorithm](https://en.wikipedia.org/wiki/Levenshtein_distance).
- didYouMean.js works in the browser as well as in node.js. To install it for use in node:
- ```
- npm install didyoumean
- ```
- Examples
- --------
- Matching against a list of strings:
- ```
- var input = 'insargrm'
- var list = ['facebook', 'twitter', 'instagram', 'linkedin'];
- console.log(didYouMean(input, list));
- > 'instagram'
- // The method matches 'insargrm' to 'instagram'.
- input = 'google plus';
- console.log(didYouMean(input, list));
- > null
- // The method was unable to find 'google plus' in the list of options.
- ```
- Matching against a list of objects:
- ```
- var input = 'insargrm';
- var list = [ { id: 'facebook' }, { id: 'twitter' }, { id: 'instagram' }, { id: 'linkedin' } ];
- var key = 'id';
- console.log(didYouMean(input, list, key));
- > 'instagram'
- // The method returns the matching value.
- didYouMean.returnWinningObject = true;
- console.log(didYouMean(input, list, key));
- > { id: 'instagram' }
- // The method returns the matching object.
- ```
- didYouMean(str, list, [key])
- ----------------------------
- - str: The string input to match.
- - list: An array of strings or objects to match against.
- - key (OPTIONAL): If your list array contains objects, you must specify the key which contains the string
- to match against.
- Returns: the closest matching string, or null if no strings exceed the threshold.
- Options
- -------
- Options are set on the didYouMean function object. You may change them at any time.
- ### threshold
- By default, the method will only return strings whose edit distance is less than 40% (0.4x) of their length.
- For example, if a ten-letter string is five edits away from its nearest match, the method will return null.
- You can control this by setting the "threshold" value on the didYouMean function. For example, to set the
- edit distance threshold to 50% of the input string's length:
- ```
- didYouMean.threshold = 0.5;
- ```
- To return the nearest match no matter the threshold, set this value to null.
- ### thresholdAbsolute
- This option behaves the same as threshold, but instead takes an integer number of edit steps. For example,
- if thresholdAbsolute is set to 20 (the default), then the method will only return strings whose edit distance
- is less than 20. Both options apply.
- ### caseSensitive
- By default, the method will perform case-insensitive comparisons. If you wish to force case sensitivity, set
- the "caseSensitive" value to true:
- ```
- didYouMean.caseSensitive = true;
- ```
- ### nullResultValue
- By default, the method will return null if there is no sufficiently close match. You can change this value here.
- ### returnWinningObject
- By default, the method will return the winning string value (if any). If your list contains objects rather
- than strings, you may set returnWinningObject to true.
-
- ```
- didYouMean.returnWinningObject = true;
- ```
-
- This option has no effect on lists of strings.
- ### returnFirstMatch
-
- By default, the method will search all values and return the closest match. If you're simply looking for a "good-
- enough" match, you can set your thresholds appropriately and set returnFirstMatch to true to substantially speed
- things up.
- License
- -------
- didYouMean copyright (c) 2013-2014 Dave Porter.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License
- [here](http://www.apache.org/licenses/LICENSE-2.0).
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- */
- (function() {
- "use strict";
- // The didYouMean method.
- function didYouMean(str, list, key) {
- if (!str) return null;
- // If we're running a case-insensitive search, smallify str.
- if (!didYouMean.caseSensitive) { str = str.toLowerCase(); }
- // Calculate the initial value (the threshold) if present.
- var thresholdRelative = didYouMean.threshold === null ? null : didYouMean.threshold * str.length,
- thresholdAbsolute = didYouMean.thresholdAbsolute,
- winningVal;
- if (thresholdRelative !== null && thresholdAbsolute !== null) winningVal = Math.min(thresholdRelative, thresholdAbsolute);
- else if (thresholdRelative !== null) winningVal = thresholdRelative;
- else if (thresholdAbsolute !== null) winningVal = thresholdAbsolute;
- else winningVal = null;
- // Get the edit distance to each option. If the closest one is less than 40% (by default) of str's length,
- // then return it.
- var winner, candidate, testCandidate, val,
- i, len = list.length;
- for (i = 0; i < len; i++) {
- // Get item.
- candidate = list[i];
- // If there's a key, get the candidate value out of the object.
- if (key) { candidate = candidate[key]; }
- // Gatekeep.
- if (!candidate) { continue; }
- // If we're running a case-insensitive search, smallify the candidate.
- if (!didYouMean.caseSensitive) { testCandidate = candidate.toLowerCase(); }
- else { testCandidate = candidate; }
- // Get and compare edit distance.
- val = getEditDistance(str, testCandidate, winningVal);
- // If this value is smaller than our current winning value, OR if we have no winning val yet (i.e. the
- // threshold option is set to null, meaning the caller wants a match back no matter how bad it is), then
- // this is our new winner.
- if (winningVal === null || val < winningVal) {
- winningVal = val;
- // Set the winner to either the value or its object, depending on the returnWinningObject option.
- if (key && didYouMean.returnWinningObject) winner = list[i];
- else winner = candidate;
- // If we're returning the first match, return it now.
- if (didYouMean.returnFirstMatch) return winner;
- }
- }
- // If we have a winner, return it.
- return winner || didYouMean.nullResultValue;
- }
- // Set default options.
- didYouMean.threshold = 0.4;
- didYouMean.thresholdAbsolute = 20;
- didYouMean.caseSensitive = false;
- didYouMean.nullResultValue = null;
- didYouMean.returnWinningObject = null;
- didYouMean.returnFirstMatch = false;
- // Expose.
- // In node...
- if (typeof module !== 'undefined' && module.exports) {
- module.exports = didYouMean;
- }
- // Otherwise...
- else {
- window.didYouMean = didYouMean;
- }
- var MAX_INT = Math.pow(2,32) - 1; // We could probably go higher than this, but for practical reasons let's not.
- function getEditDistance(a, b, max) {
- // Handle null or undefined max.
- max = max || max === 0 ? max : MAX_INT;
- var lena = a.length;
- var lenb = b.length;
- // Fast path - no A or B.
- if (lena === 0) return Math.min(max + 1, lenb);
- if (lenb === 0) return Math.min(max + 1, lena);
- // Fast path - length diff larger than max.
- if (Math.abs(lena - lenb) > max) return max + 1;
- // Slow path.
- var matrix = [],
- i, j, colMin, minJ, maxJ;
- // Set up the first row ([0, 1, 2, 3, etc]).
- for (i = 0; i <= lenb; i++) { matrix[i] = [i]; }
- // Set up the first column (same).
- for (j = 0; j <= lena; j++) { matrix[0][j] = j; }
- // Loop over the rest of the columns.
- for (i = 1; i <= lenb; i++) {
- colMin = MAX_INT;
- minJ = 1;
- if (i > max) minJ = i - max;
- maxJ = lenb + 1;
- if (maxJ > max + i) maxJ = max + i;
- // Loop over the rest of the rows.
- for (j = 1; j <= lena; j++) {
- // If j is out of bounds, just put a large value in the slot.
- if (j < minJ || j > maxJ) {
- matrix[i][j] = max + 1;
- }
- // Otherwise do the normal Levenshtein thing.
- else {
- // If the characters are the same, there's no change in edit distance.
- if (b.charAt(i - 1) === a.charAt(j - 1)) {
- matrix[i][j] = matrix[i - 1][j - 1];
- }
- // Otherwise, see if we're substituting, inserting or deleting.
- else {
- matrix[i][j] = Math.min(matrix[i - 1][j - 1] + 1, // Substitute
- Math.min(matrix[i][j - 1] + 1, // Insert
- matrix[i - 1][j] + 1)); // Delete
- }
- }
- // Either way, update colMin.
- if (matrix[i][j] < colMin) colMin = matrix[i][j];
- }
- // If this column's minimum is greater than the allowed maximum, there's no point
- // in going on with life.
- if (colMin > max) return max + 1;
- }
- // If we made it this far without running into the max, then return the final matrix value.
- return matrix[lenb][lena];
- }
- })();
|