Anakunda / libStringDistance

// ==UserScript==
// ==UserLibrary==
// @name         libStringDistance
// @namespace    https://openuserjs.org/users/Anakunda
// @version      1.01
// @license      GPL-3.0-or-later
// @copyright    2021, Anakunda (https://openuserjs.org/users/Anakunda)
// @description  String Similarity Comparision
// @exclude      *
// ==/UserScript==
// ==/UserLibrary==

function hammingDistance(strA, strB) {
	if (typeof strA != 'string' || typeof strB != 'string') throw 'Invalid arguments';
	var d = 0, h = strA ^ strB;
	while (h > 0) { ++d; h &= h - 1; }
	return d;
}

function levenshteinDistance(strA, strB) {
	if (typeof strA != 'string' || typeof strB != 'string') throw 'Invalid arguments';
	if (strA.length <= 0) return strB.length;
	if (strB.length <= 0) return strA.length;
	var matrix = [];
	// increment along the first column of each row
	for (var i = 0; i <= strB.length; ++i) matrix[i] = [i];
	// increment each column in the first row
	for (var j = 0; j <= strA.length; ++j) matrix[0][j] = j;
	// Fill in the rest of the matrix
	for (i = 1; i <= strB.length; ++i) {
		for (j = 1; j <= strA.length; ++j) {
			matrix[i][j] = strB.charAt(i-1) == strA.charAt(j-1) ? matrix[i-1][j - 1]
				: Math.min(matrix[i - 1][j - 1] + 1, Math.min(matrix[i][j - 1] + 1, matrix[i - 1][j] + 1));
		}
	}
	return matrix[strB.length][strA.length];
}

function cosineSimilarity(strA, strB) {
	if (typeof strA != 'string' || typeof strB != 'string') throw 'Invalid arguments';

	function termFreqMap(str) {
		var words = str.split(' '), termFreq = {};
		words.forEach(w => { termFreq[w] = (termFreq[w] || 0) + 1 });
		return termFreq;
	}
	function addKeysToDict(map, dict) {
		for (let key in map) dict[key] = true;
	}
	function termFreqMapToVector(map, dict) {
		var termFreqVector = [];
		for (let term in dict) termFreqVector.push(map[term] || 0);
		return termFreqVector;
	}
	function vecDotProduct(vecA, vecB) {
		var product = 0;
		for (let i = 0; i < vecA.length; i++) product += vecA[i] * vecB[i];
		return product;
	}
	function vecMagnitude(vec) {
		var sum = 0;
		for (let i = 0; i < vec.length; ++i) sum += vec[i] * vec[i];
		return Math.sqrt(sum);
	}
	function _cosineSimilarity(vecA, vecB) {
		return vecDotProduct(vecA, vecB) / (vecMagnitude(vecA) * vecMagnitude(vecB));
	}

	var termFreqA = termFreqMap(strA), termFreqB = termFreqMap(strB), dict = {};
	addKeysToDict(termFreqA, dict);
	addKeysToDict(termFreqB, dict);
	var termFreqVecA = termFreqMapToVector(termFreqA, dict), termFreqVecB = termFreqMapToVector(termFreqB, dict);
	return _cosineSimilarity(termFreqVecA, termFreqVecB);
}

function jaroWrinkerSimilarity(strA, strB) {
	if (typeof strA != 'string' || typeof strB != 'string') throw 'Invalid arguments';
	// Exit early if either are empty.
	if (strA.length <= 0 || strB.length <= 0) return 0;
	// Exit early if they're an exact match.
	if (strA == strB) return 1;
	let m = 0, range = (Math.floor(Math.max(strA.length, strB.length) / 2)) - 1,
			s1Matches = new Array(strA.length), s2Matches = new Array(strB.length);
	for (let i = 0; i < strA.length; ++i) {
		let low = (i >= range) ? i - range : 0,
				high = (i + range <= strB.length) ? (i + range) : (strB.length - 1);
		for (let j = low; j <= high; ++j) {
			if (s1Matches[i] === true || s2Matches[j] === true || strA[i] !== strB[j]) continue;
			++m;
			s1Matches[i] = s2Matches[j] = true;
			break;
		}
	}
	// Exit early if no matches were found.
	if (m == 0) return 0;
	// Count the transpositions.
	let k = 0, n_trans = 0;
	for (let i = 0, j; i < strA.length; ++i) {
		if (s1Matches[i] !== true) continue;
		for (j = k; j < strB.length; ++j) {
			if (s2Matches[j] !== true) continue;
			k = j + 1;
			break;
		}
		if (strA[i] !== strB[j]) ++n_trans;
	}
	var weight = (m / strA.length + m / strB.length + (m - (n_trans / 2)) / m) / 3, l = 0, p = 0.1;
	if (weight > 0.7) {
		while (strA[l] === strB[l] && l < 4) ++l;
		weight += l * p * (1 - weight);
	}
	return weight;
}

function trigramIndex(inputPhrases) {
	function asTrigrams(phrase, callback) {
		var rawData = "  ".concat(phrase, "  ");
		for (var i = rawData.length - 3; i >= 0; i = i - 1) callback.call(this, rawData.slice(i, i + 3));
	};

	if (!Array.isArray(inputPhrases)) return null;
	var instance = {
		phrases: [],
		trigramIndex: [],

		index: function(phrase) {
			if (!phrase || this.phrases.includes(phrase)) return;
			var phraseIndex = this.phrases.push(phrase) - 1;
			asTrigrams.call(this, phrase, function (trigram) {
				var phrasesForTrigram = this.trigramIndex[trigram];
				if (!phrasesForTrigram) phrasesForTrigram = [];
				if (phrasesForTrigram.indexOf(phraseIndex) < 0) phrasesForTrigram.push(phraseIndex);
				this.trigramIndex[trigram] = phrasesForTrigram;
			});
		},

		find: function(phrase) {
			var phraseMatches = [], trigramsInPhrase = 0;
			asTrigrams.call(this, phrase, function(trigram) {
				var phrasesForTrigram = this.trigramIndex[trigram];
				trigramsInPhrase += 1;
				if (phrasesForTrigram) for (var j in phrasesForTrigram) {
					let phraseIndex = phrasesForTrigram[j];
					if (!phraseMatches[phraseIndex]) phraseMatches[phraseIndex] = 0;
					phraseMatches[phraseIndex] += 1;
				}
			});
			var result = [];
			for (var i in phraseMatches) result.push({ phrase: this.phrases[i], matches: phraseMatches[i] });
			result.sort((a, b) => b.matches - a.matches);
			return result;
		}
	};
	for (let i in inputPhrases) instance.index(inputPhrases[i]);
	return instance;
}
function trigramCompare(strA, strB) {
	if (typeof strA != 'string' || typeof strB != 'string') throw 'Invalid arguments';
	return trigramIndex([strA]).find(strB);
}