fmm / RetailPath product analysis

// ==UserScript==
// @name         RetailPath product analysis
// @namespace    http://tampermonkey.net/
// @version      0.4.3
// @description  Analyse the positions order and derive more information
// @author       fmm
// @match        https://metcash.retailpath.com.au/*
// @license      MIT
// @updateURL    https://openuserjs.org/meta/fmm/RetailPath_product_analysis.meta.js
// ==/UserScript==


function go() {
    'use strict';

    var aTags = document.getElementsByTagName("tr");
    var searchText = "Positions Order";

    for (var i = 0; i < aTags.length; i++) {
        if (aTags[i].textContent.includes(searchText)) {
            var found, index, description, foundIndex;
            var columnBeforeRaw = [];
            var columnAfterRaw = [];
            var columnBefore = [];
            var columnAfter = [];

            found = aTags[i];
            var nextRow = found.nextElementSibling;
            var columns = nextRow.getElementsByClassName('textChange');

            // Asterisks break the script so remove them from the raw data
            columnBeforeRaw = columns[0].innerHTML.replace(/<\/?span[^>]*>/g,"").replace(/\*+/g,"").trim().split(/\<br\>(?=\d+\s\-)/g);
            columnAfterRaw = columns[1].innerHTML.replace(/<\/?span[^>]*>/g,"").replace(/\*+/g,"").trim().split(/\<br\>(?=\d+\s\-)/g);

            // Replace the actual HTML
            columns[0].innerHTML = columns[0].innerHTML.replace(/<\/?span[^>]*>/g,"").replace(/\<br\>(?!\d+\s\-)/g,"").trim();
            columns[1].innerHTML = columns[1].innerHTML.replace(/<\/?span[^>]*>/g,"").replace(/\<br\>(?!\d+\s\-)/g,"").trim();

            // build arrays
            // Extra <br> and newlines cause null entries so remove those too
            for (var b = 0; b < columnBeforeRaw.length; b++) {
                index = columnBeforeRaw[b].match(/^\d\d?/g);
                description = columnBeforeRaw[b].replace(/\d\d? \- /g,"");
                description = escapeRegex(description);
                if (index !== null) {
                    columnBefore.push({'index': index, 'description': description.replace(/\<br\>|\n|\r/g, " ").replace(/\s+$/g, "")}); // remove any inner <br> and newline chars, and trailing whitespace
                }
            }

            for (var a = 0; a < columnAfterRaw.length; a++) {
                index = columnAfterRaw[a].match(/^\d\d?/g);
                description = columnAfterRaw[a].replace(/\d\d? \- /g,"");
                description = escapeRegex(description);
                if (index !== null) {
                    columnAfter.push({'index': index, 'description': description.replace(/\<br\>|\n|\r/g, " ").replace(/\s+$/g, "")}); // remove any inner <br> and newline chars, and trailing whitespace
                }
            }

            // find moved items
            for (var z = 0; z < columnBefore.length; z++) {
                for (var y = 0; y < columnAfter.length; y++) {
                    if (columnBefore[z].description === columnAfter[y].description) {
                        if (parseInt(columnBefore[z].index) !== parseInt(columnAfter[y].index)) {
                            var re = new RegExp(columnAfter[y].description, "g");
                            columns[1].innerHTML = columns[1].innerHTML.replace(re, "<span style='letter-spacing: 0.05em; font-weight: bold; color: white; background-color: DodgerBlue; padding: 1px 3px;'>MOVED</span> $&");
                            columnBefore[z][status] = 'moved';
                            columnAfter[y][status] = 'moved';
                        }
                    }
                }
            }

            // find edited items
            var re = new RegExp(/\sselected varieties/, "ig");
            var testValue;

            for (var e = 0; e < columnAfter.length; e++) {

                for (var d = 0; d < columnBefore.length; d++) {
                    console.log(columnAfter[e].description.match(re) === columnBefore[d].description.match(re));
                    if (columnAfter[e].description.match(re) === columnBefore[d].description.match(re)) {
                        testValue = testCosineSimilarity(columnAfter[e].description.replace(re, ""), columnBefore[d].description.replace(re, ""));
                    } else {
                        testValue = testCosineSimilarity(columnAfter[e].description, columnBefore[d].description);
                    }

                    //var testValue = testCosineSimilarity(columnAfter[e].description.replace(/\sselected varieties/ig,""), columnBefore[d].description.replace(/\sselected varieties/ig,""));
                    //var testValue = JaroWrinker(columnAfter[e].description, columnBefore[d].description);
                    //console.log(testValue);
                    console.log(testValue + ": " + columnAfter[e].description + " --- " + columnBefore[d].description);

                    // only mark as edited if high score AND items hasn't moved
                    if (testValue >= 0.6 && testValue < 0.999 && columnAfter[e][status] !== 'moved' && parseInt(columnBefore[d].index) === parseInt(columnAfter[e].index)) {
                        //console.log(testValue + ": " + columnAfter[e].description + " --- " + columnBefore[d].description);

                        var re = new RegExp(columnAfter[e].description, "g");
                        columns[1].innerHTML = columns[1].innerHTML.replace(re, "<span style='letter-spacing: 0.05em; font-weight: bold; color: white; background-color: Darkorange; padding: 1px 3px;'>EDITED</span> $&");
                        columnBefore[d][status] = 'edited';
                        columnAfter[e][status] = 'edited';

                        //console.log(parseInt(columnBefore[d].index) === parseInt(columnAfter[e].index));
                    }
                }
            }

            // find new items
            foundIndex = [];
            for (var x = 0; x < columnAfter.length; x++) {
                foundIndex = columnBefore.findIndex(el => el.description === columnAfter[x].description);

                if (foundIndex == -1 && columnAfter[x][status] !== 'edited') {
                    var re = new RegExp(columnAfter[x].description, "g");
                    columns[1].innerHTML = columns[1].innerHTML.replace(re, "<span style='letter-spacing: 0.05em; font-weight: bold; color: white; background-color: MediumSeaGreen; padding: 1px 3px;'>ADDED</span> $&");
                }
            }

            // find deleted items
            foundIndex = [];
            for (var g = 0; g < columnBefore.length; g++) {
                foundIndex = columnAfter.findIndex(el => el.description === columnBefore[g].description);
                //console.log("found index: " + foundIndex);

                if (foundIndex === -1 && columnBefore[g][status] !== 'edited') {
                    columns[1].innerHTML += "<br><span style='letter-spacing: 0.05em; font-weight: bold; color: white; background-color: red; padding: 1px 3px;'>DELETED</span> " + columnBefore[g].description;
                }
            }

            //break;
        }
    }

}

// UTILITY FUNCTIONS

// Ensures symbols within descriptions wont break regex
function escapeRegex(string) {
    return string.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&');
}

// Cosine-Similarity.js
// https://medium.com/@sumn2u/string-similarity-comparision-in-js-with-examples-4bae35f13968
function termFreqMap(str) {
    var words = str.split(' ');
    var termFreq = {};
    words.forEach(function(w) {
        termFreq[w] = (termFreq[w] || 0) + 1;
    });
    return termFreq;
}

function addKeysToDict(map, dict) {
    for (var key in map) {
        dict[key] = true;
    }
}

function termFreqMapToVector(map, dict) {
    var termFreqVector = [];
    for (var term in dict) {
        termFreqVector.push(map[term] || 0);
    }
    return termFreqVector;
}

function vecDotProduct(vecA, vecB) {
    var product = 0;
    for (var i = 0; i < vecA.length; i++) {
        product += vecA[i] * vecB[i];
    }
    return product;
}

function vecMagnitude(vec) {
    var sum = 0;
    for (var i = 0; i < vec.length; i++) {
        sum += vec[i] * vec[i];
    }
    return Math.sqrt(sum);
}

function cosineSimilarity(vecA, vecB) {
    return vecDotProduct(vecA, vecB) / (vecMagnitude(vecA) * vecMagnitude(vecB));
}

testCosineSimilarity = function textCosineSimilarity(strA, strB) {
    var termFreqA = termFreqMap(strA);
    var termFreqB = termFreqMap(strB);

    var dict = {};
    addKeysToDict(termFreqA, dict);
    addKeysToDict(termFreqB, dict);

    var termFreqVecA = termFreqMapToVector(termFreqA, dict);
    var termFreqVecB = termFreqMapToVector(termFreqB, dict);

    return cosineSimilarity(termFreqVecA, termFreqVecB);
}

// Jaro-Wrinker.js
JaroWrinker  = function (s1, s2) {
    var m = 0;

    // Exit early if either are empty.
    if ( s1.length === 0 || s2.length === 0 ) {
        return 0;
    }

    // Exit early if they're an exact match.
    if ( s1 === s2 ) {
        return 1;
    }

    var range     = (Math.floor(Math.max(s1.length, s2.length) / 2)) - 1,
        s1Matches = new Array(s1.length),
        s2Matches = new Array(s2.length);

    for ( i = 0; i < s1.length; i++ ) {
        var low  = (i >= range) ? i - range : 0,
            high = (i + range <= s2.length) ? (i + range) : (s2.length - 1);

        for ( j = low; j <= high; j++ ) {
            if ( s1Matches[i] !== true && s2Matches[j] !== true && s1[i] === s2[j] ) {
                ++m;
                s1Matches[i] = s2Matches[j] = true;
                break;
            }
        }
    }

    // Exit early if no matches were found.
    if ( m === 0 ) {
        return 0;
    }

    // Count the transpositions.
    var k = n_trans = 0;

    for ( i = 0; i < s1.length; i++ ) {
        if ( s1Matches[i] === true ) {
            for ( j = k; j < s2.length; j++ ) {
                if ( s2Matches[j] === true ) {
                    k = j + 1;
                    break;
                }
            }

            if ( s1[i] !== s2[j] ) {
                ++n_trans;
            }
        }
    }

    var weight = (m / s1.length + m / s2.length + (m - (n_trans / 2)) / m) / 3,
        l      = 0,
        p      = 0.1;

    if ( weight > 0.7 ) {
        while ( s1[l] === s2[l] && l < 4 ) {
            ++l;
        }

        weight = weight + l * p * (1 - weight);
    }

    return weight;
}

// GO
go();