6 Commits

Author SHA1 Message Date
Kevin R
81d5b35fe2 Added tokenzied provider lookup
Reduce lookup overhead by using map for provider matching
2025-12-07 22:10:16 +01:00
Kevin R
2f8403f1ce Removed comments 2025-12-07 21:50:02 +01:00
Kevin R
9cf1704100 Fix 2025-12-07 21:47:41 +01:00
Kevin R
dc23257632 Bump version 2025-12-07 21:37:03 +01:00
Kevin R
7c8d6b0eee Pre-compiled Regular Expressions
Previously, regular expressions were compiled inside loops for every URL processed.

Now RegExp objects are created once when rules are loaded and stored in the provider instance.
2025-12-07 21:36:15 +01:00
TrianguloY
6d904144ec Added URLCheck to 'projects that use...' in the readme 2025-07-05 14:04:14 +02:00
3 changed files with 130 additions and 85 deletions

View File

@@ -78,6 +78,7 @@ Please push your translation into the folder `_locales/{language code}/messages.
* [Unalix-nim](https://github.com/AmanoTeam/Unalix-nim) small, dependency-free, fast Nim package and CLI tool for removing tracking fields from URLs
* [UnalixAndroid](https://github.com/AmanoTeam/UnalixAndroid) simple Android app that removes link masking/tracking and optionally resolves shortened links
* [pl-fe](https://github.com/mkljczk/pl-fe) is a Fediverse client which uses ClearURLs code to clean URLs from displayed posts and recommend cleaning URLs from created posts
* [URLCheck](https://github.com/TrianguloY/URLCheck) is an Android app to review and edit URLs before opening them. Allows to use the ClearURLs catalog.
## Recommended by...
* [ghacks-user.js](https://github.com/ghacksuserjs/ghacks-user.js/wiki/4.1-Extensions)

View File

@@ -21,6 +21,8 @@
* This script is responsible for the core functionalities.
*/
var providers = [];
var providersByToken = {}; // Map<string, Provider[]>
var globalProviders = []; // Provider[]
var prvKeys = [];
var siteBlockedAlert = 'javascript:void(0)';
var dataHash;
@@ -89,14 +91,14 @@ function removeFieldsFormURL(provider, pureUrl, quiet = false, request = null) {
/*
* Apply raw rules to the URL.
*/
rawRules.forEach(function (rawRule) {
rawRules.forEach(function ({ rule: rawRuleStr, regex: rawRuleRegex }) {
let beforeReplace = url;
url = url.replace(new RegExp(rawRule, "gi"), "");
url = url.replace(rawRuleRegex, "");
if (beforeReplace !== url) {
//Log the action
if (storage.loggingStatus && !quiet) {
pushToLog(beforeReplace, url, rawRule);
pushToLog(beforeReplace, url, rawRuleStr);
}
increaseBadged(quiet, request);
@@ -113,13 +115,13 @@ function removeFieldsFormURL(provider, pureUrl, quiet = false, request = null) {
* Only test for matches, if there are fields or fragments that can be cleaned.
*/
if (fields.toString() !== "" || fragments.toString() !== "") {
rules.forEach(rule => {
rules.forEach(({ rule, regex }) => {
const beforeFields = fields.toString();
const beforeFragments = fragments.toString();
let localChange = false;
for (const field of fields.keys()) {
if (new RegExp("^"+rule+"$", "gi").test(field)) {
if (regex.test(field)) {
fields.delete(field);
changes = true;
localChange = true;
@@ -127,7 +129,7 @@ function removeFieldsFormURL(provider, pureUrl, quiet = false, request = null) {
}
for (const fragment of fragments.keys()) {
if (new RegExp("^"+rule+"$", "gi").test(fragment)) {
if (regex.test(fragment)) {
fragments.delete(fragment);
changes = true;
localChange = true;
@@ -228,6 +230,17 @@ function start() {
for (let re = 0; re < methods.length; re++) {
providers[p].addMethod(methods[re]);
}
// Indexing logic
const token = providers[p].getLookupToken();
if (token) {
if (!providersByToken[token]) {
providersByToken[token] = [];
}
providersByToken[token].push(providers[p]);
} else {
globalProviders.push(providers[p]);
}
}
}
@@ -360,7 +373,7 @@ function start() {
let methods = [];
if (_completeProvider) {
enabled_rules[".*"] = true;
enabled_rules[".*"] = new RegExp("^.*$", "i");
}
/**
@@ -379,6 +392,38 @@ function start() {
return name;
};
/**
* Returns the lookup token for this provider, or null if global.
* Extracts "domain" from patterns like ^https?://(?:[a-z0-9-]+\.)*?domain...
* @return {String|null}
*/
this.getLookupToken = function () {
if (!urlPattern) return null;
const source = urlPattern.source;
// Case 1: Wildcard prefix pattern (e.g. ...*?amazon...)
const wildcardMatch = source.match(/\*\?([a-z0-9-]+)/i);
if (wildcardMatch && wildcardMatch[1]) {
return wildcardMatch[1].toLowerCase();
}
// Case 2: Explicit start pattern (e.g. ^https?://vk.com...)
// Matches ^https?://(optional www.)token
// We strip standard regex start structure to find the first meaningful domain token.
// This regex handles:
// - ^https?:// (start)
// - \/\/ or // (slashes, potentially escaped)
// - www. (optional www prefix, potentially escaped)
// - [a-z0-9-]+ (the token)
// It deliberately fails on patterns with groups (?:...) at the start, falling back to global.
const explicitMatch = source.match(/^(\^?https?:\\?\/\\?\/)(?:www(?:\\?\.))?([a-z0-9-]+)/i);
if (explicitMatch && explicitMatch[2]) {
return explicitMatch[2].toLowerCase();
}
return null;
};
/**
* Add URL pattern.
*
@@ -406,25 +451,21 @@ function start() {
};
/**
* Apply a rule to a given tuple of rule array.
* @param enabledRuleArray array for enabled rules
* @param disabledRulesArray array for disabled rules
* @param {String} rule RegExp as string
* @param {boolean} isActive Is this rule active?
* Helper to update rule maps with compiled regexes.
*/
this.applyRule = (enabledRuleArray, disabledRulesArray, rule, isActive = true) => {
const updateRule = (enabledMap, disabledMap, rule, isActive, compileFn) => {
if (isActive) {
enabledRuleArray[rule] = true;
if (disabledRulesArray[rule] !== undefined) {
delete disabledRulesArray[rule];
if (!enabledMap[rule]) {
try {
enabledMap[rule] = compileFn(rule);
} catch (e) {
console.error("Invalid regex", rule, e);
}
}
if (disabledMap[rule]) delete disabledMap[rule];
} else {
disabledRulesArray[rule] = true;
if (enabledRuleArray[rule] !== undefined) {
delete enabledRuleArray[rule];
}
disabledMap[rule] = true;
if (enabledMap[rule]) delete enabledMap[rule];
}
};
@@ -436,20 +477,22 @@ function start() {
* @param {boolean} isActive Is this rule active?
*/
this.addRule = function (rule, isActive = true) {
this.applyRule(enabled_rules, disabled_rules, rule, isActive);
updateRule(enabled_rules, disabled_rules, rule, isActive, r => new RegExp("^" + r + "$", "i"));
};
/**
* Return all active rules as an array.
* Return all active rules as an array of {rule, regex}.
*
* @return Array RegExp strings
* @return Array Objects
*/
this.getRules = function () {
let source = enabled_rules;
if (!storage.referralMarketing) {
return Object.keys(Object.assign(enabled_rules, enabled_referralMarketing));
// Determine if we need to merge referral marketing rules
// We use a new object to avoid mutating enabled_rules via Object.assign if that was happening
source = Object.assign({}, enabled_rules, enabled_referralMarketing);
}
return Object.keys(enabled_rules);
return Object.entries(source).map(([rule, regex]) => ({ rule, regex }));
};
/**
@@ -460,16 +503,17 @@ function start() {
* @param {boolean} isActive Is this rule active?
*/
this.addRawRule = function (rule, isActive = true) {
this.applyRule(enabled_rawRules, disabled_rawRules, rule, isActive);
updateRule(enabled_rawRules, disabled_rawRules, rule, isActive, r => new RegExp(r, "gi"));
};
/**
* Return all active raw rules as an array.
*
* @return Array RegExp strings
* @return Array Objects {rule, regex}
*/
this.getRawRules = function () {
return Object.keys(enabled_rawRules);
// return Object.keys(enabled_rawRules);
return Object.entries(enabled_rawRules).map(([rule, regex]) => ({ rule, regex }));
};
/**
@@ -480,7 +524,7 @@ function start() {
* @param {boolean} isActive Is this rule active?
*/
this.addReferralMarketing = function (rule, isActive = true) {
this.applyRule(enabled_referralMarketing, disabled_referralMarketing, rule, isActive);
updateRule(enabled_referralMarketing, disabled_referralMarketing, rule, isActive, r => new RegExp("^" + r + "$", "i"));
};
/**
@@ -491,19 +535,7 @@ function start() {
* @param {Boolean} isActive Is this exception active?
*/
this.addException = function (exception, isActive = true) {
if (isActive) {
enabled_exceptions[exception] = true;
if (disabled_exceptions[exception] !== undefined) {
delete disabled_exceptions[exception];
}
} else {
disabled_exceptions[exception] = true;
if (enabled_exceptions[exception] !== undefined) {
delete enabled_exceptions[exception];
}
}
updateRule(enabled_exceptions, disabled_exceptions, exception, isActive, r => new RegExp(r, "i"));
};
/**
@@ -541,11 +573,9 @@ function start() {
//Add the site blocked alert to every exception
if (url === siteBlockedAlert) return true;
for (const exception in enabled_exceptions) {
for (const [exception, regex] of Object.entries(enabled_exceptions)) {
if (result) break;
let exception_regex = new RegExp(exception, "i");
result = exception_regex.test(url);
result = regex.test(url);
}
return result;
@@ -559,19 +589,7 @@ function start() {
* @param {Boolean} isActive Is this redirection active?
*/
this.addRedirection = function (redirection, isActive = true) {
if (isActive) {
enabled_redirections[redirection] = true;
if (disabled_redirections[redirection] !== undefined) {
delete disabled_redirections[redirection];
}
} else {
disabled_redirections[redirection] = true;
if (enabled_redirections[redirection] !== undefined) {
delete enabled_redirections[redirection];
}
}
updateRule(enabled_redirections, disabled_redirections, redirection, isActive, r => new RegExp(r, "i"));
};
/**
@@ -582,11 +600,11 @@ function start() {
this.getRedirection = function (url) {
let re = null;
for (const redirection in enabled_redirections) {
let result = (url.match(new RegExp(redirection, "i")));
for (const [redirection, regex] of Object.entries(enabled_redirections)) {
let result = url.match(regex);
if (result && result.length > 0 && redirection) {
re = (new RegExp(redirection, "i")).exec(url)[1];
re = result[1];
break;
}
@@ -623,16 +641,42 @@ function start() {
pushToLog(request.url, request.url, translate('log_ping_blocked'));
increaseBadged(false, request);
increaseTotalCounter(1);
return {cancel: true};
return { cancel: true };
}
let host = "";
try {
host = extractHost(new URL(request.url));
} catch (e) {
// If URL parsing fails, we falls back to empty host, relying on global providers or skipping
}
const hostTokens = host.split('.').map(t => t.toLowerCase());
// Collect candidate providers: Global + Key Matches
// Use a Set to avoid duplicates if multiple tokens map to same provider (unlikely but safe)
let candidateProviders = new Set(globalProviders);
for (const token of hostTokens) {
if (providersByToken[token]) {
for (const p of providersByToken[token]) {
candidateProviders.add(p);
}
}
}
// "providers" global var is still used for legacy, but here we iterate candidates
// Converting Set to Array for iteration
const candidates = Array.from(candidateProviders);
/*
* Call for every provider the removeFieldsFormURL method.
*/
for (let i = 0; i < providers.length; i++) {
if (!providers[i].matchMethod(request)) continue;
if (providers[i].matchURL(request.url)) {
result = removeFieldsFormURL(providers[i], request.url, false, request);
for (let i = 0; i < candidates.length; i++) {
const provider = candidates[i];
if (!provider.matchMethod(request)) continue;
if (provider.matchURL(request.url)) {
result = removeFieldsFormURL(provider, request.url, false, request);
}
/*
@@ -640,10 +684,10 @@ function start() {
* Cancel the active request.
*/
if (result.redirect) {
if (providers[i].shouldForceRedirect() &&
if (provider.shouldForceRedirect() &&
request.type === 'main_frame') {
browser.tabs.update(request.tabId, {url: result.url}).catch(handleError);
return {cancel: true};
browser.tabs.update(request.tabId, { url: result.url }).catch(handleError);
return { cancel: true };
}
return {
@@ -658,9 +702,9 @@ function start() {
if (result.cancel) {
if (request.type === 'main_frame') {
const blockingPage = browser.runtime.getURL("html/siteBlockedAlert.html?source=" + encodeURIComponent(request.url));
browser.tabs.update(request.tabId, {url: blockingPage}).catch(handleError);
browser.tabs.update(request.tabId, { url: blockingPage }).catch(handleError);
return {cancel: true};
return { cancel: true };
} else {
return {
redirectUrl: siteBlockedAlert
@@ -723,7 +767,7 @@ function start() {
*/
browser.webRequest.onBeforeRequest.addListener(
promise,
{urls: ["<all_urls>"], types: getData("types").concat(getData("pingRequestTypes"))},
{ urls: ["<all_urls>"], types: getData("types").concat(getData("pingRequestTypes")) },
["blocking"]
);
}

View File

@@ -1,7 +1,7 @@
{
"manifest_version": 2,
"name": "ClearURLs",
"version": "1.27.3",
"version": "1.28.0",
"author": "Kevin Roebert",
"description": "__MSG_extension_description__",
"homepage_url": "https://docs.clearurls.xyz",
@@ -270,18 +270,18 @@
"*://*.google.co.zw/*",
"*://*.google.cat/*"
],
"include_globs": [
"http?://www.google.*/",
"include_globs": [
"http?://www.google.*/",
"http?://www.google.*/#hl=*",
"http?://www.google.*/search*",
"http?://www.google.*/search*",
"http?://www.google.*/webhp?hl=*",
"https://encrypted.google.*/",
"https://encrypted.google.*/",
"https://encrypted.google.*/#hl=*",
"https://encrypted.google.*/search*",
"https://encrypted.google.*/search*",
"https://encrypted.google.*/webhp?hl=*",
"http?://ipv6.google.com/",
"http?://ipv6.google.com/",
"http?://ipv6.google.com/search*"
],
],
"js": [
"core_js/google_link_fix.js"
],
@@ -303,4 +303,4 @@
"options_ui": {
"page": "html/settings.html"
}
}
}