isURL.js 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241
  1. function _slicedToArray(r, e) { return _arrayWithHoles(r) || _iterableToArrayLimit(r, e) || _unsupportedIterableToArray(r, e) || _nonIterableRest(); }
  2. function _nonIterableRest() { throw new TypeError("Invalid attempt to destructure non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); }
  3. function _unsupportedIterableToArray(r, a) { if (r) { if ("string" == typeof r) return _arrayLikeToArray(r, a); var t = {}.toString.call(r).slice(8, -1); return "Object" === t && r.constructor && (t = r.constructor.name), "Map" === t || "Set" === t ? Array.from(r) : "Arguments" === t || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(t) ? _arrayLikeToArray(r, a) : void 0; } }
  4. function _arrayLikeToArray(r, a) { (null == a || a > r.length) && (a = r.length); for (var e = 0, n = Array(a); e < a; e++) n[e] = r[e]; return n; }
  5. function _iterableToArrayLimit(r, l) { var t = null == r ? null : "undefined" != typeof Symbol && r[Symbol.iterator] || r["@@iterator"]; if (null != t) { var e, n, i, u, a = [], f = !0, o = !1; try { if (i = (t = t.call(r)).next, 0 === l) { if (Object(t) !== t) return; f = !1; } else for (; !(f = (e = i.call(t)).done) && (a.push(e.value), a.length !== l); f = !0); } catch (r) { o = !0, n = r; } finally { try { if (!f && null != t["return"] && (u = t["return"](), Object(u) !== u)) return; } finally { if (o) throw n; } } return a; } }
  6. function _arrayWithHoles(r) { if (Array.isArray(r)) return r; }
  7. import assertString from './util/assertString';
  8. import checkHost from './util/checkHost';
  9. import includes from './util/includesString';
  10. import isFQDN from './isFQDN';
  11. import isIP from './isIP';
  12. import merge from './util/merge';
  13. /*
  14. options for isURL method
  15. protocols - valid protocols can be modified with this option.
  16. require_tld - If set to false isURL will not check if the URL's host includes a top-level domain.
  17. require_protocol - if set to true isURL will return false if protocol is not present in the URL.
  18. require_host - if set to false isURL will not check if host is present in the URL.
  19. require_port - if set to true isURL will check if port is present in the URL.
  20. require_valid_protocol - isURL will check if the URL's protocol is present in the protocols option.
  21. allow_underscores - if set to true, the validator will allow underscores in the URL.
  22. host_whitelist - if set to an array of strings or regexp, and the domain matches none of the strings
  23. defined in it, the validation fails.
  24. host_blacklist - if set to an array of strings or regexp, and the domain matches any of the strings
  25. defined in it, the validation fails.
  26. allow_trailing_dot - if set to true, the validator will allow the domain to end with
  27. a `.` character.
  28. allow_protocol_relative_urls - if set to true protocol relative URLs will be allowed.
  29. allow_fragments - if set to false isURL will return false if fragments are present.
  30. allow_query_components - if set to false isURL will return false if query components are present.
  31. disallow_auth - if set to true, the validator will fail if the URL contains an authentication
  32. component, e.g. `http://username:password@example.com`
  33. validate_length - if set to false isURL will skip string length validation. `max_allowed_length`
  34. will be ignored if this is set as `false`.
  35. max_allowed_length - if set, isURL will not allow URLs longer than the specified value (default is
  36. 2084 that IE maximum URL length).
  37. */
  38. var default_url_options = {
  39. protocols: ['http', 'https', 'ftp'],
  40. require_tld: true,
  41. require_protocol: false,
  42. require_host: true,
  43. require_port: false,
  44. require_valid_protocol: true,
  45. allow_underscores: false,
  46. allow_trailing_dot: false,
  47. allow_protocol_relative_urls: false,
  48. allow_fragments: true,
  49. allow_query_components: true,
  50. validate_length: true,
  51. max_allowed_length: 2084
  52. };
  53. var wrapped_ipv6 = /^\[([^\]]+)\](?::([0-9]+))?$/;
  54. export default function isURL(url, options) {
  55. assertString(url);
  56. if (!url || /[\s<>]/.test(url)) {
  57. return false;
  58. }
  59. if (url.indexOf('mailto:') === 0) {
  60. return false;
  61. }
  62. options = merge(options, default_url_options);
  63. if (options.validate_length && url.length > options.max_allowed_length) {
  64. return false;
  65. }
  66. if (!options.allow_fragments && includes(url, '#')) {
  67. return false;
  68. }
  69. if (!options.allow_query_components && (includes(url, '?') || includes(url, '&'))) {
  70. return false;
  71. }
  72. var protocol, auth, host, hostname, port, port_str, split, ipv6;
  73. split = url.split('#');
  74. url = split.shift();
  75. split = url.split('?');
  76. url = split.shift();
  77. // Replaced the 'split("://")' logic with a regex to match the protocol.
  78. // This correctly identifies schemes like `javascript:` which don't use `//`.
  79. // However, we need to be careful not to confuse authentication credentials (user:password@host)
  80. // with protocols. A colon before an @ symbol might be part of auth, not a protocol separator.
  81. var protocol_match = url.match(/^([a-z][a-z0-9+\-.]*):/i);
  82. var had_explicit_protocol = false;
  83. var cleanUpProtocol = function cleanUpProtocol(potential_protocol) {
  84. had_explicit_protocol = true;
  85. protocol = potential_protocol.toLowerCase();
  86. if (options.require_valid_protocol && options.protocols.indexOf(protocol) === -1) {
  87. // The identified protocol is not in the allowed list.
  88. return false;
  89. }
  90. // Remove the protocol from the URL string.
  91. return url.substring(protocol_match[0].length);
  92. };
  93. if (protocol_match) {
  94. var potential_protocol = protocol_match[1];
  95. var after_colon = url.substring(protocol_match[0].length);
  96. // Check if what follows looks like authentication credentials (user:password@host)
  97. // rather than a protocol. This happens when:
  98. // 1. There's no `//` after the colon (protocols like `http://` have this)
  99. // 2. There's an `@` symbol before any `/`
  100. // 3. The part before `@` contains only valid auth characters (alphanumeric, -, _, ., %, :)
  101. var starts_with_slashes = after_colon.slice(0, 2) === '//';
  102. if (!starts_with_slashes) {
  103. var first_slash_position = after_colon.indexOf('/');
  104. var before_slash = first_slash_position === -1 ? after_colon : after_colon.substring(0, first_slash_position);
  105. var at_position = before_slash.indexOf('@');
  106. if (at_position !== -1) {
  107. var before_at = before_slash.substring(0, at_position);
  108. var valid_auth_regex = /^[a-zA-Z0-9\-_.%:]*$/;
  109. var is_valid_auth = valid_auth_regex.test(before_at);
  110. // Check if this contains URL-encoded content that could be malicious
  111. // For example: javascript:%61%6c%65%72%74%28%31%29@example.com
  112. // The encoded part decodes to: alert(1)
  113. var has_encoded_content = /%[0-9a-fA-F]{2}/.test(before_at);
  114. if (is_valid_auth && !has_encoded_content) {
  115. // This looks like authentication (e.g., user:password@host), not a protocol
  116. if (options.require_protocol) {
  117. return false;
  118. }
  119. // Don't consume the colon; let the auth parsing handle it later
  120. } else {
  121. // This looks like a malicious protocol (e.g., javascript:alert();@host)
  122. // or URL-encoded protocol handler (e.g., javascript:%61%6c%65%72%74%28%31%29@host)
  123. url = cleanUpProtocol(potential_protocol);
  124. if (url === false) {
  125. return false;
  126. }
  127. }
  128. } else {
  129. // No @ symbol found. Check if this could be a port number instead of a protocol.
  130. // If what's after the colon is numeric (or starts with a digit and contains only
  131. // valid port characters until a path separator), it's likely hostname:port, not a protocol.
  132. var looks_like_port = /^[0-9]/.test(after_colon);
  133. if (looks_like_port) {
  134. // This looks like hostname:port, not a protocol
  135. if (options.require_protocol) {
  136. return false;
  137. }
  138. // Don't consume anything; let it be parsed as hostname:port
  139. } else {
  140. // This is definitely a protocol
  141. url = cleanUpProtocol(potential_protocol);
  142. if (url === false) {
  143. return false;
  144. }
  145. }
  146. }
  147. } else {
  148. // Starts with '//', this is definitely a protocol like http://
  149. url = cleanUpProtocol(potential_protocol);
  150. if (url === false) {
  151. return false;
  152. }
  153. }
  154. } else if (options.require_protocol) {
  155. return false;
  156. }
  157. // Handle leading '//' only as protocol-relative when there was NO explicit protocol.
  158. // If there was an explicit protocol, '//' is the normal separator
  159. // and should be stripped unconditionally.
  160. if (url.slice(0, 2) === '//') {
  161. if (!had_explicit_protocol && !options.allow_protocol_relative_urls) {
  162. return false;
  163. }
  164. url = url.slice(2);
  165. }
  166. if (url === '') {
  167. return false;
  168. }
  169. split = url.split('/');
  170. url = split.shift();
  171. if (url === '' && !options.require_host) {
  172. return true;
  173. }
  174. split = url.split('@');
  175. if (split.length > 1) {
  176. if (options.disallow_auth) {
  177. return false;
  178. }
  179. if (split[0] === '') {
  180. return false;
  181. }
  182. auth = split.shift();
  183. if (auth.indexOf(':') >= 0 && auth.split(':').length > 2) {
  184. return false;
  185. }
  186. var _auth$split = auth.split(':'),
  187. _auth$split2 = _slicedToArray(_auth$split, 2),
  188. user = _auth$split2[0],
  189. password = _auth$split2[1];
  190. if (user === '' && password === '') {
  191. return false;
  192. }
  193. }
  194. hostname = split.join('@');
  195. port_str = null;
  196. ipv6 = null;
  197. var ipv6_match = hostname.match(wrapped_ipv6);
  198. if (ipv6_match) {
  199. host = '';
  200. ipv6 = ipv6_match[1];
  201. port_str = ipv6_match[2] || null;
  202. } else {
  203. split = hostname.split(':');
  204. host = split.shift();
  205. if (split.length) {
  206. port_str = split.join(':');
  207. }
  208. }
  209. if (port_str !== null && port_str.length > 0) {
  210. port = parseInt(port_str, 10);
  211. if (!/^[0-9]+$/.test(port_str) || port <= 0 || port > 65535) {
  212. return false;
  213. }
  214. } else if (options.require_port) {
  215. return false;
  216. }
  217. if (options.host_whitelist) {
  218. return checkHost(host, options.host_whitelist);
  219. }
  220. if (host === '' && !options.require_host) {
  221. return true;
  222. }
  223. if (!isIP(host) && !isFQDN(host, options) && (!ipv6 || !isIP(ipv6, 6))) {
  224. return false;
  225. }
  226. host = host || ipv6;
  227. if (options.host_blacklist && checkHost(host, options.host_blacklist)) {
  228. return false;
  229. }
  230. return true;
  231. }