Skip to content

Commit

Permalink
Merge pull request #236 from jshemas/isUrl
Browse files Browse the repository at this point in the history
replace validator with internal version of isUrl
  • Loading branch information
jshemas authored Jul 22, 2024
2 parents c6bb642 + edef1f3 commit 6de7db1
Show file tree
Hide file tree
Showing 7 changed files with 426 additions and 96 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Change Log

## 6.7.0

- Replace `validator` with internal version of `isUrl` so we have better control on how that works.
- Fix issue where `JSON` parsing fails when Youtube escape '&' to '\x26'.
- Updating dependencies

## 6.6.3

- Fix issue with the `charset` fallback. Replace Buffer.from with Uint8Array since body is always html
Expand Down
315 changes: 315 additions & 0 deletions lib/isUrl.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,315 @@
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-nocheck
/* eslint-disable @typescript-eslint/no-unsafe-argument */
/* eslint-disable @typescript-eslint/no-unsafe-call */
/* eslint-disable @typescript-eslint/no-unsafe-member-access */
/* eslint-disable @typescript-eslint/no-unsafe-assignment */
// This is from https://github.com/validatorjs/validator.js version: 13.12.0

// https://github.com/validatorjs/validator.js/blob/master/src/lib/util/assertString.js
function assertString(input) {
const isString = typeof input === 'string' || input instanceof String;

if (!isString) {
let invalidType = typeof input;
if (input === null) invalidType = 'null';
else if (invalidType === 'object') invalidType = input.constructor.name;

throw new TypeError(`Expected a string but received a ${invalidType}`);
}
}

// https://github.com/validatorjs/validator.js/blob/master/src/lib/util/merge.js
// eslint-disable-next-line @typescript-eslint/default-param-last
function merge(obj = { }, defaults) {
// eslint-disable-next-line no-restricted-syntax
for (const key in defaults) {
if (typeof obj[key] === 'undefined') {
obj[key] = defaults[key];
}
}
return obj;
}

// https://github.com/validatorjs/validator.js/blob/master/src/lib/isFQDN.js
const defaultFqdnOptions = {
require_tld: true,
allow_underscores: false,
allow_trailing_dot: false,
allow_numeric_tld: false,
allow_wildcard: false,
ignore_max_length: false,
};

function isFQDN(str, options) {
assertString(str);
options = merge(options, defaultFqdnOptions);

/* Remove the optional trailing dot before checking validity */
if (options.allow_trailing_dot && str[str.length - 1] === '.') {
str = str.substring(0, str.length - 1);
}

/* Remove the optional wildcard before checking validity */
if (options.allow_wildcard === true && str.indexOf('*.') === 0) {
str = str.substring(2);
}

const parts = str.split('.');
const tld = parts[parts.length - 1];

if (options.require_tld) {
// disallow fqdns without tld
if (parts.length < 2) {
return false;
}

if (
!options.allow_numeric_tld
// eslint-disable-next-line @typescript-eslint/no-unsafe-argument
&& !/^([a-z\u00A1-\u00A8\u00AA-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF]{2,}|xn[a-z0-9-]{2,})$/i.test(tld)
) {
return false;
}

// disallow spaces
if (/\s/.test(tld)) {
return false;
}
}

// reject numeric TLDs
if (!options.allow_numeric_tld && /^\d+$/.test(tld)) {
return false;
}

// eslint-disable-next-line @typescript-eslint/no-unsafe-return
return parts.every((part) => {
if (part.length > 63 && !options.ignore_max_length) {
return false;
}

if (!/^[a-z_\u00a1-\uffff0-9-]+$/i.test(part)) {
return false;
}

// disallow full-width chars
if (/[\uff01-\uff5e]/.test(part)) {
return false;
}

// disallow parts starting or ending with hyphen
if (/^-|-$/.test(part)) {
return false;
}

if (!options.allow_underscores && /_/.test(part)) {
return false;
}

return true;
});
}

// https://github.com/validatorjs/validator.js/blob/master/src/lib/isIP.js
const IPv4SegmentFormat = '(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])';
const IPv4AddressFormat = `(${IPv4SegmentFormat}[.]){3}${IPv4SegmentFormat}`;
const IPv4AddressRegExp = new RegExp(`^${IPv4AddressFormat}$`);

const IPv6SegmentFormat = '(?:[0-9a-fA-F]{1,4})';
const IPv6AddressRegExp = new RegExp('^('
+ `(?:${IPv6SegmentFormat}:){7}(?:${IPv6SegmentFormat}|:)|`
+ `(?:${IPv6SegmentFormat}:){6}(?:${IPv4AddressFormat}|:${IPv6SegmentFormat}|:)|`
+ `(?:${IPv6SegmentFormat}:){5}(?::${IPv4AddressFormat}|(:${IPv6SegmentFormat}){1,2}|:)|`
+ `(?:${IPv6SegmentFormat}:){4}(?:(:${IPv6SegmentFormat}){0,1}:${IPv4AddressFormat}|(:${IPv6SegmentFormat}){1,3}|:)|`
+ `(?:${IPv6SegmentFormat}:){3}(?:(:${IPv6SegmentFormat}){0,2}:${IPv4AddressFormat}|(:${IPv6SegmentFormat}){1,4}|:)|`
+ `(?:${IPv6SegmentFormat}:){2}(?:(:${IPv6SegmentFormat}){0,3}:${IPv4AddressFormat}|(:${IPv6SegmentFormat}){1,5}|:)|`
+ `(?:${IPv6SegmentFormat}:){1}(?:(:${IPv6SegmentFormat}){0,4}:${IPv4AddressFormat}|(:${IPv6SegmentFormat}){1,6}|:)|`
+ `(?::((?::${IPv6SegmentFormat}){0,5}:${IPv4AddressFormat}|(?::${IPv6SegmentFormat}){1,7}|:))`
+ ')(%[0-9a-zA-Z-.:]{1,})?$');

function isIP(str, version = '') {
assertString(str);
version = String(version);
if (!version) {
// eslint-disable-next-line @typescript-eslint/no-unsafe-return
return isIP(str, 4) || isIP(str, 6);
}
if (version === '4') {
return IPv4AddressRegExp.test(str);
}
if (version === '6') {
return IPv6AddressRegExp.test(str);
}
return false;
}

// https://github.com/validatorjs/validator.js/blob/master/src/lib/isURL.js
/*
options for isURL method
require_protocol - if set as true isURL will return false if protocol is not present in the URL
require_valid_protocol - isURL will check if the URL's protocol is present in the protocols option
protocols - valid protocols can be modified with this option
require_host - if set as false isURL will not check if host is present in the URL
require_port - if set as true isURL will check if port is present in the URL
allow_protocol_relative_urls - if set as true protocol relative URLs will be allowed
validate_length - if set as false isURL will skip string length validation (IE maximum is 2083)
*/
const defaultUrlOptions = {
protocols: ['http', 'https', 'ftp'],
require_tld: true,
require_protocol: false,
require_host: true,
require_port: false,
require_valid_protocol: true,
allow_underscores: false,
allow_trailing_dot: false,
allow_protocol_relative_urls: false,
allow_fragments: true,
allow_query_components: true,
validate_length: true,
};

const wrappedIpv6 = /^\[([^\]]+)\](?::([0-9]+))?$/;

function isRegExp(obj) {
return Object.prototype.toString.call(obj) === '[object RegExp]';
}

function checkHost(host, matches) {
// eslint-disable-next-line @typescript-eslint/prefer-for-of
for (let i = 0; i < matches.length; i += 1) {
const match = matches[i];
if (host === match || (isRegExp(match) && match.test(host))) {
return true;
}
}
return false;
}

export default function isURL(url, options) {
assertString(url);
if (!url || /[\s<>]/.test(url)) {
return false;
}
if (url.indexOf('mailto:') === 0) {
return false;
}
options = merge(options, defaultUrlOptions);

if (options.validate_length && url.length >= 2083) {
return false;
}

if (!options.allow_fragments && url.includes('#')) {
return false;
}

if (!options.allow_query_components && (url.includes('?') || url.includes('&'))) {
return false;
}

// eslint-disable-next-line @typescript-eslint/naming-convention
let protocol; let auth; let host; let port; let port_str; let split; let
ipv6;

split = url.split('#');
url = split.shift();

split = url.split('?');
url = split.shift();

split = url.split('://');
if (split.length > 1) {
protocol = split.shift().toLowerCase();
if (options.require_valid_protocol && options.protocols.indexOf(protocol) === -1) {
return false;
}
} else if (options.require_protocol) {
return false;
} else if (url.slice(0, 2) === '//') {
if (!options.allow_protocol_relative_urls) {
return false;
}
split[0] = url.slice(2);
}
url = split.join('://');

if (url === '') {
return false;
}

split = url.split('/');
url = split.shift();

if (url === '' && !options.require_host) {
return true;
}

split = url.split('@');
if (split.length > 1) {
if (options.disallow_auth) {
return false;
}
if (split[0] === '') {
return false;
}
auth = split.shift();
if (auth.indexOf(':') >= 0 && auth.split(':').length > 2) {
return false;
}
const [user, password] = auth.split(':');
if (user === '' && password === '') {
return false;
}
}
const hostname = split.join('@');

port_str = null;
ipv6 = null;
// eslint-disable-next-line @typescript-eslint/naming-convention
const ipv6_match = hostname.match(wrappedIpv6);
if (ipv6_match) {
host = '';
// eslint-disable-next-line prefer-destructuring
ipv6 = ipv6_match[1];
port_str = ipv6_match[2] || null;
} else {
split = hostname.split(':');
host = split.shift();
if (split.length) {
port_str = split.join(':');
}
}

if (port_str !== null && port_str.length > 0) {
port = parseInt(port_str, 10);
if (!/^[0-9]+$/.test(port_str) || port <= 0 || port > 65535) {
return false;
}
} else if (options.require_port) {
return false;
}

if (options.host_whitelist) {
return checkHost(host, options.host_whitelist);
}

if (host === '' && !options.require_host) {
return true;
}

if (!isIP(host) && !isFQDN(host, options) && (!ipv6 || !isIP(ipv6, 6))) {
return false;
}

host = host || ipv6;

if (options.host_blacklist && checkHost(host, options.host_blacklist)) {
return false;
}

return true;
}
4 changes: 2 additions & 2 deletions lib/utils.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import validator from 'validator';
import isUrl from './isUrl';
import type {
CustomMetaTags,
OgObjectInteral,
Expand Down Expand Up @@ -30,7 +30,7 @@ export const defaultUrlValidatorSettings = {
*
*/
export function isUrlValid(url: string, urlValidatorSettings: ValidatorSettings): boolean {
return typeof url === 'string' && url.length > 0 && validator.isURL(url, urlValidatorSettings);
return typeof url === 'string' && url.length > 0 && isUrl(url, urlValidatorSettings);
}

/**
Expand Down
Loading

0 comments on commit 6de7db1

Please sign in to comment.