zombie / BookReaderView

// ==UserScript==
// @name        BookReaderView
// @namespace   com.vsubhash.js.BookReaderView
// @version     1
// @grant       none
// @copyright		V. Subhash, 2019 (https://gist.github.com/vsubhash)
// @license			GPL-3.0-or-later; http://www.gnu.org/licenses/gpl-3.0.txt
// ==/UserScript==

if (subhash_browser_js == null) {
  var subhash_browser_js = {};
}

subhash_browser_js.book_reader_js = {
	sHtml: "", 
	bTitleFound: false,
	arYucks: [ "-ads", "_ads", "advert", "adcode", "adselect", "addthis", "alsoread", "comment", 
               "discuss", "email", "facebook", "float", "follow", "franchise", "googlead",  
               "hide_", "hidden", "hover", "jump", "lazy", "linkedin", "navig", "notifi", 
               "outbrain", "partner", "popular", "popup", "print", "reddit", "share", "sharing", 
               "short-url", "social", "sponsor", "sprite", "subscribe", "taboola", "trend", 
               "twitter", "url-short", "zipr" ],	
	
	createHeader: function() {
		subhash_browser_js.book_reader_js.sHtml = "<head>\n" +
			"	<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n" +
			"	<title>" + document.title + "</title>\n" +
			"	<style>\n" +
			" a { border-bottom: 1px dotted navy; }\n" +
			" body { background-color: rgb(200,200,220); color: black; font-family: sans-serif;\n" +
            "        font-size: 0.5cm; margin: 1em auto; padding: 1em; max-width: 9in; }\n" +
			" code { font-family: monospace; }\n" +
			" h1 { text-align: center; border-bottom: 1px solid black; padding-bottom: 0.2em; }\n" +
			" a h1, a h2, a h3, a h4, a h5, a h6, h1 a, h2 a, h3 a, h4 a, h5 a, h6 a\n" +
            "       { color: black; border-bottom: 1px dotted black; }\n" +
			" pre, figure { margin: 1em auto; padding: 1em;  }\n" +		
			" img { display: block; margin: 1em auto; max-height: 40%; max-width: 40%; }\n" +
			" img[src*='.svg'] { display: none!important; }\n" +
			" figcaption { font-weight: bold; font-size: 0.8em; text-align: center; }\n" +
			" header, footer, aside, nav { display: none; }\n" +
			"	</style>\n" +
			"</head>\n" +
			"<body>\n";
	},	
	
	removeUnwantedTags: function() {
		var arrTagsToHide = [ "aside", "footer", "iframe", "nav", "noscript", "script"];
		for (var i = 0; i < arrTagsToHide.length; i++) {
			var arElementsToHide = document.getElementsByTagName(arrTagsToHide[i]);
			var j = arElementsToHide.length;
			while (j > 0) {
				arElementsToHide[j-1].parentNode.removeChild(arElementsToHide[j-1]);
				--j;
			}
		}
	},	
	
	addNoYuckiesStyle: function() {
		var sStyle = "\n<style>";
		for (var i = 0; i < subhash_browser_js.book_reader_js.arYucks.length; i++) {
			sStyle += "*[class*=\"" + subhash_browser_js.book_reader_js.arYucks[i] + "\"], *[id*=\"" + subhash_browser_js.book_reader_js.arYucks[i] + "\"] ";
			if (i < (subhash_browser_js.book_reader_js.arYucks.length-1)) {
				sStyle += ",";
			}
		}
		sStyle += " { display: none!important; }\n</style>\n";
		document.getElementsByTagName("body")[0].innerHTML += sStyle;
		//console.error(sStyle);
	},	
	
	parseFiniteElement: function(aoEl) {
		//console.error("Finite tag: " + aoEl.tagName);
		subhash_browser_js.book_reader_js.isTitleTag(aoEl.tagName.toLowerCase());
		if (!subhash_browser_js.book_reader_js.bTitleFound || 
			!subhash_browser_js.book_reader_js.hasNoYuckiness(aoEl)) { return; }
	
		var sElTag = aoEl.tagName.toLowerCase();
		if (!subhash_browser_js.book_reader_js.isUsefulTag(sElTag)) { return; }
		//console.error(sElTag + " outed");
		if (sElTag == "a" && aoEl.href) {
			if (aoEl.href.indexOf("#") == 0) {
				subhash_browser_js.book_reader_js.sHtml += aoEl.textContent;
			} else {
				subhash_browser_js.book_reader_js.sHtml += "<a href=\"" + aoEl.getAttribute("href") + "\">" + aoEl.textContent + "</a>";
			}
		} else if (sElTag == "abbr") {
			subhash_browser_js.book_reader_js.sHtml += aoEl.textContent + 
            " (" + aoEl.getAttribute("title") + ") " + "\n";
		} else if ((sElTag == "b") || (sElTag == "em") || (sElTag == "strong")) {
			subhash_browser_js.book_reader_js.sHtml += "<b>" + aoEl.textContent + "</b>";
		} else if (sElTag == "br") {
			subhash_browser_js.book_reader_js.sHtml += "<br />";
		} else if ((sElTag == "cite") || (sElTag == "i") || (sElTag == "time")) {
			subhash_browser_js.book_reader_js.sHtml += "<i>" + aoEl.textContent + "</i>";
		} else if ((sElTag == "ins") || (sElTag == "kbd") || (sElTag == "mark") || (sElTag == "u")) {
			subhash_browser_js.book_reader_js.sHtml += "<u>" + aoEl.textContent + "</u>";
		} else if (sElTag == "img") {
			subhash_browser_js.book_reader_js.sHtml += "<img src=\"" + 
                                      aoEl.getAttribute("src") + "\" />";
		} else if ((sElTag == "cite") || (sElTag == "s") || (sElTag == "strike")) {
			subhash_browser_js.book_reader_js.sHtml += "<s>" + aoEl.textContent + "</s>";
		} else if ((sElTag == "code") || (sElTag == "samp") || (sElTag == "var")) {
			subhash_browser_js.book_reader_js.sHtml += "<code>" + aoEl.textContent + "</code>";
		} else if ((sElTag == "sub")) {
			subhash_browser_js.book_reader_js.sHtml += "<sub>" + aoEl.textContent + "</sub>";
		} else if (sElTag == "sup") {
			subhash_browser_js.book_reader_js.sHtml += "<sup>" + aoEl.textContent + "</sup>";
		} else if ((sElTag == "label") || (sElTag == "span") || (sElTag == "wbr")) {
			subhash_browser_js.book_reader_js.sHtml += aoEl.textContent;  // ignore
	
	
		} else if ((sElTag == "h1") || (sElTag == "h2") || (sElTag == "h3") || 
							(sElTag == "h4") || (sElTag == "h5") || (sElTag == "h6") ||
							(sElTag == "figcaption") || (sElTag == "p")) {
			subhash_browser_js.book_reader_js.sHtml += "<" + sElTag + ">" + 
                             aoEl.textContent + "</" + sElTag + ">";
		}
	},
	
	isUsefulTag: function(asTag) {
		var arTags = [ "a", "b", "i", "s", "u", "abbr", "article", "br", "code", 
                       "cite", "em", "figure", "figcaption", "h1", "h2", "h3", "h4", "h5", 
                       "h6", "img", "ins", "kbd", "label", "li", "main", "mark", "navig", "ol", 
                       "p", "pre", "samp", "strike", "sub", "sup", "span", 
                       "strong", "time", "ul", "var", "wbr" ]; 
		for (var i = 0; i < arTags.length; i++) {
			if (asTag == arTags[i]) { 
				//console.error(asTag + " is valid");
				return(true);
			}
		}
		//console.error(asTag + " is not valid");
		return(false);
	},	
	
	isTitleTag: function(asTag) {
		if ((!subhash_browser_js.book_reader_js.bTitleFound) && 
            ((asTag == "h1") || (asTag == "h2") || (asTag == "h3"))) {
			subhash_browser_js.book_reader_js.bTitleFound = true;
			//console.error("found");
		}
		return(subhash_browser_js.book_reader_js.bTitleFound);
	},

	hasNoYuckiness: function(aoNode) {
		if (aoNode.className) {
			if (aoNode.className.indexOf) {
			  for (var i = 0; i < subhash_browser_js.book_reader_js.arYucks.length; i++) {
			  	if (aoNode.className.toLowerCase().indexOf(subhash_browser_js.book_reader_js.arYucks[i]) > -1) {
			  		//console.error("Yucky " + aoNode.className);
			  		return(false);
			  	} else {
			  		//console.error("Yucky no find " + subhash_browser_js.book_reader_js.arYucks[i]);
			  	}
			  }
			}
		}
	
		if (aoNode.getAttribute) {
			if (aoNode.getAttribute("id")) {
				if (aoNode.getAttribute("id").indexOf) {
					for (var i = 0; i < subhash_browser_js.book_reader_js.arYucks.length; i++) {
						if (aoNode.getAttribute("id").toLowerCase().indexOf(subhash_browser_js.book_reader_js.arYucks[i]) > -1) {
							//console.error("Yucky " + aoNode.getAttribute("id"));
							return(false);
						}
					}
				}
			}
		}

		return(true);
	},

	parseNode: function(aoNode) {
		var sTag = aoNode.nodeName.toLowerCase();
		//console.error("Node checking " + sTag);
		subhash_browser_js.book_reader_js.isTitleTag(aoNode.nodeName.toLowerCase());
		if (subhash_browser_js.book_reader_js.bTitleFound && 
				subhash_browser_js.book_reader_js.isUsefulTag(sTag) && 
				subhash_browser_js.book_reader_js.hasNoYuckiness(aoNode)) { 
			if (sTag == "a" && (aoNode.href)) {
				subhash_browser_js.book_reader_js.sHtml += "<" + sTag + 
                                         " href=\"" + aoNode.href  + "\">"; 
			} else {
				subhash_browser_js.book_reader_js.sHtml += "<" + sTag + ">"; 
			}
		}
		for (var i = 0; i < aoNode.childNodes.length; i++) {
			var oNode = aoNode.childNodes[i];
			subhash_browser_js.book_reader_js.isTitleTag(oNode.nodeName.toLowerCase());
			if (oNode.nodeType == Node.ELEMENT_NODE) {
				subhash_browser_js.book_reader_js.parseElement(oNode);
			} else if (oNode.nodeType == Node.TEXT_NODE) {
				if (subhash_browser_js.book_reader_js.bTitleFound) { 
					subhash_browser_js.book_reader_js.sHtml += oNode.nodeValue;
				}
			}
		}
		if (subhash_browser_js.book_reader_js.bTitleFound && 
				subhash_browser_js.book_reader_js.isUsefulTag(sTag)) {
			subhash_browser_js.book_reader_js.sHtml += "</" + sTag + ">";
		}
		//console.error("Html is : " + subhash_browser_js.book_reader_js.sHtml);
	},
	
	parseElement: function(aoEl) {
		if (window.getComputedStyle(aoEl)) {
			if (window.getComputedStyle(aoEl).getPropertyValue("display") == "none") {
				try { console.error("Ignoring hidden element: " + 
                      aoEl.outerHTML.substr(0,300)); } catch (e) {}
		  	return;
			}
		}
		//console.error("Checking element " + aoEl.tagName);
		subhash_browser_js.book_reader_js.isTitleTag(aoEl.tagName.toLowerCase());
		if (aoEl.children.length > 0) {
			subhash_browser_js.book_reader_js.parseNode(aoEl);
		} else if (subhash_browser_js.book_reader_js.bTitleFound) {
			subhash_browser_js.book_reader_js.parseFiniteElement(aoEl);
		}
	},

	changeToReader: function() {
		try {
			subhash_browser_js.book_reader_js.addNoYuckiesStyle(); 
			subhash_browser_js.book_reader_js.removeUnwantedTags();
			subhash_browser_js.book_reader_js.createHeader();
			var	oEl = document.getElementsByTagName("body")[0];
			subhash_browser_js.book_reader_js.parseElement(oEl);
			subhash_browser_js.book_reader_js.sHtml += "</body>\n";
			document.getElementsByTagName("html")[0].innerHTML = subhash_browser_js.book_reader_js.sHtml;
		} catch (e) {
			console.error("Subhash Browser BRV Error" + e);
		}
	},

	handle_DOMLoaded: function() {
		try {
			window.setTimeout(
				function() {
					subhash_browser_js.book_reader_js.changeToReader();
				}, 
				5*1000);
		} catch (e) {
			console.error("Subhash Browser BRV Error: " + e);
		}
	}
}

document.addEventListener
  ("DOMContentLoaded", subhash_browser_js.book_reader_js.handle_DOMLoaded, false);