jscher2000 / Google SERP Keyword Highlighter

// ==UserScript==
// @name        Google SERP Keyword Highlighter
// @author      Jefferson "jscher2000" Scher
// @namespace   JeffersonScher
// @copyright   Copyright 2016 Jefferson Scher
// @license     BSD 3-clause
// @include     http*://www.google.*/*
// @include     http*://encrypted.google.*/*
// @include     http*://yandex.ru/*
// @include     http*://nigma.ru/*
// @include     http*://www.nigma.ru/*
// @description Highlight your query terms on the Google, Yandex.ru and Nigma.ru results page. BETA v0.7.1 2016-12-11
// @version     0.7.1
// @grant       none
// @updateURL https://openuserjs.org/meta/jscher2000/Google_SERP_Keyword_Highlighter.meta.js
// ==/UserScript==
/* Contains code from Text Highlight and Seek by erosman and Jefferson "jscher2000" Scher
   DISCLAIMER:     Use at your own risk. Functionality and harmlessness cannot be guaranteed.
*/
(function() { // anonymous function wrapper, used for error checking & limiting scope
  'use strict';

  // highlight colors -- up to 25 keywords -- INITIALLY SET TO YELLOW ONLY
  var yellowonly = true; // override all highlight colors to bright yellow
  var backcolors = ["rgb(255,255,128)", "rgb(255,128,255)", "rgb(128,255,255)", "rgb(255,128,128)", "rgb(128,255,128)", 
                    "rgb(128,128,255)", "rgb(128,255,0)", "rgb(255,128,0)", "rgb(0,128,255)", "rgb(0,255,128)", 
                    "rgb(255,0,128)", "rgb(128,0,255)", "rgb(255,255,0)", "rgb(255,255,0)", "rgb(255,255,0)", 
                    "rgb(255,255,0)", "rgb(255,255,0)", "rgb(255,255,0)", "rgb(255,255,0)", "rgb(255,255,0)", 
                    "rgb(255,255,0)", "rgb(255,255,0)", "rgb(255,255,0)", "rgb(255,255,0)", "rgb(255,0,0)"];
  if (yellowonly){
    for (var y=0; y<backcolors.length; y++) backcolors[y] = "rgb(255,255,0)";
  }

  // special query features
  var parsephrases = true;   // when quoted, only highlight "potato chips" as a phrase, not potato and chips separately
  var hyphenoptions = true;  // for user-script: match user-script, userscript, and user script

  // TODO: stemming, Google modified your query, many other special cases...

  // do not run in any framed pages in this version
  var hlframe = "none";
  if ((window.self !== window.top) && (hlframe != "any")) { // framed page
    if (hlframe == "none") return; 
    if (hlframe == "same") {
      console.log(window.self.location.hostname + " vs " + window.top.location.hostname);
    }
  }
  
  // store query
  function getQuery(){
    // Extract query terms, starting from end of results page URL
    var qa = window.location.href.substr(window.location.href.indexOf("?")+1).split("&");
    for (var j=qa.length-1; j>=0; j--){
      if (qa[j].split("=")[0] == "q" && location.hostname.indexOf("google") > -1){ // usually the initial query
        return decodeURIComponent(qa[j].substr(2)).trim();
      }
      if (qa[j].indexOf("#q=") > -1 && location.hostname.indexOf("google") > -1){ // usually a subsequent query
        return decodeURIComponent(qa[j].substr(qa[j].indexOf("#q=")+3)).trim();
      }
      if (qa[j].split("=")[0] == "text" && location.hostname == "yandex.ru"){ // Yandex
        return decodeURIComponent(qa[j].split("=")[1]).trim();
      }
      if (qa[j].split("=")[0] == "s" && location.hostname.indexOf("nigma.ru") > -1){ // Nigma
        return decodeURIComponent(qa[j].split("=")[1]).trim();
      }
    }
  }
  var currentquery = getQuery(), lastquery = currentquery;
  function parseQuery(qry){
    if (parsephrases === false || qry.indexOf('"') < 0){ // highlight individual words
      var arrayTemp = qry.replace(/"/g, "").replace(/\+{1,}/g, " ").trim().split(" ");
      if (hyphenoptions){
        for (var i=0; i<arrayTemp.length; i++){
          if (arrayTemp[i].indexOf("-") > 0){
            arrayTemp[i] += "|" + arrayTemp[i].replace("-", " ") + "|" + arrayTemp[i].replace("-", "");
          }
        }
      }
    } else { // preserve phrases
      var qrysegments = qry.split('"'), phraserun = false, kwstring = "", kwtemp;
      for (var k=0; k<qrysegments.length; k++){
        if (qrysegments[k].length > 0){
          if (phraserun){
            kwstring += "\\\\" + qrysegments[k].replace(/\+{1,}/g, " ").trim();
          } else {
            kwtemp = qrysegments[k].replace(/\+{1,}/g, " ").trim().split(" ");
            if (hyphenoptions){
              for (var i=0; i<kwtemp.length; i++){
                if (kwtemp[i].indexOf("-") > 0){
                  kwtemp[i] += "|" + kwtemp[i].replace("-", " ") + "|" + kwtemp[i].replace("-", "");
                }
              }
            }
            kwstring += "\\\\" + kwtemp.join("\\\\");
          }
        }
        if (phraserun === true) phraserun = false;
        else phraserun = true;
      }
      var arrayTemp = kwstring.substr(2).split("\\\\");
    }
    /* This should never be necessary and may not really make it much more efficient...
    for (i=arrayTemp.length; i>0; i--){
      if(arrayTemp[i-1].substr(0,1) == '-'){ // this is an exclusion so remove from the set
        arrayTemp.splice(i-1, 1);
      }
    }
    */
    return arrayTemp;
  }
  var qTerms = parseQuery(currentquery);
  
  // Main workhorse routine
  function THmo_doHighlight(el){
    // Update query parsing if necessary
    currentquery = getQuery();
    if (currentquery.length === 0) return;
    if (currentquery != lastquery){
      lastquery = currentquery;
      qTerms = parseQuery(currentquery);
    }
    // Apply highlighting
    for (var j = 0; j < qTerms.length; ++j) {
      if (j >= 25) break; // not enough highlight colors!
      var hlkeywords = qTerms[j];
      var rQuantifiers = /[-\/\\^$*+?.()[\]{}]/g;
      hlkeywords = hlkeywords.replace(rQuantifiers, '\\$&');
      var pat = new RegExp('(' + hlkeywords + ')', 'gi');
      var span = document.createElement('thdfrag');
      span.setAttribute("thdcontain","true");
      // getting all text nodes with a few exceptions
      var snapElements = document.evaluate(
        './/text()[normalize-space() != "" ' +
        'and not(ancestor::style) ' +
        'and not(ancestor::script) ' +
        'and not(ancestor::textarea) ' +
        'and not(ancestor::pre) ' +
        'and not(ancestor::code) ' +
        'and not(ancestor::div[@id="extrares"]) ' +
        'and not(parent::thdfrag[@gskh])]',
        el, null, XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE, null);

      if (!snapElements.snapshotItem(0)) { break; }

      for (var i = 0, len = snapElements.snapshotLength; i < len; i++) {
        var node = snapElements.snapshotItem(i);
        // check if it contains the keywords
        if (pat.test(node.nodeValue)) {
          // create an element, replace the text node with an element
          var sp = span.cloneNode(true);
          sp.innerHTML = node.nodeValue.replace(/</g, '&lt;').replace(/>/g, '&gt;').replace(pat, '<thdfrag gskh="' + j + '" style="background-color:'+backcolors[j]+'">$1</thdfrag>');
          node.parentNode.replaceChild(sp, node);
          // try to un-nest containers
          if (sp.parentNode.hasAttribute("thdcontain")) sp.outerHTML = sp.innerHTML;
        }
      }
    }
  }
  // first run
  if (document.getElementById('search')) THmo_doHighlight(document.getElementById('search'));
  else THmo_doHighlight(document.body);
  
  // Add MutationObserver to catch content added dynamically
  var THmo_MutOb = (window.MutationObserver) ? window.MutationObserver : window.WebKitMutationObserver;
  if (THmo_MutOb){
    var THmo_chgMon = new THmo_MutOb(function(mutationSet){
      mutationSet.forEach(function(mutation){
        for (var i=0; i<mutation.addedNodes.length; i++){
          if (mutation.addedNodes[i].nodeType == 1){
            THmo_doHighlight(mutation.addedNodes[i]);
          }
        }
      });
    });
    // attach chgMon to document.body
    var opts = {childList: true, subtree: true};
    THmo_chgMon.observe(document.body, opts);
  }
})(); // end of anonymous function