NOTICE: By continued use of this site you understand and agree to the binding Terms of Service and Privacy Policy.
// ==UserScript== // @name FA Gallery Scraper // @namespace Artex // @description Retrieves the sources to the submissions in a gallery // @include https://www.furaffinity.net/favorites/* // @include https://www.furaffinity.net/scraps/* // @include https://www.furaffinity.net/gallery/* // @include http://www.furaffinity.net/favorites/* // @include http://www.furaffinity.net/scraps/* // @include http://www.furaffinity.net/gallery/* // @run-at document-end // @version 1.3.9 // @homepage https://www.furaffinity.net/user/artex./ // @license GPL-3.0 // @grant none // ==/UserScript== /* to do: - handle bad requests - done, largely untested. - look into saving images to disk - doesn't look to be very do-able without making an extension. - add a pause/resume button (easy enough) */ var sources = null; var submissions = []; var failed = []; var debugEnabled = false; var downloadPages = [1, 1]; //ex:1,5 pages 1-5. var pageNum = downloadPages[0]; //current page downloading var submissionsComplete = 0; //number submissions downloaded. var currentPage; //gallery document being scraped var downloadAll = false; var downloadMetadata = false; var startMode = true; //toggles behavior of start button. var menuOpen = false; var masterDiv = null; function resetGlobals() { sources = null; submissions = []; failed = []; downloadPages = [1, 1]; //ex:1,5 pages 1-5. pageNum = downloadPages[0]; //current page downloading submissionsComplete = 0; //number submissions downloaded. currentPage = undefined; //gallery document being scraped downloadAll = false; startMode = true; //toggles behavior of start button. downloadMetadata = false; menuOpen = false; } function log() { if (debugEnabled === true) { console.log.apply(console, arguments); } } //sets the status text of the scraping processs. function setStatus(str) { var statusText = masterDiv.querySelector("#statusText"); if (statusText !== null) { statusText.textContent = str; } else { log("attempt to set status without statusText"); } } function setProgress(percent) { var statsFill = masterDiv.querySelector("#statusFill"); statusFill.style.width = (100 * percent) + "%"; } function writeToOutput(source, json) { var output = masterDiv.querySelector("#output"); if (json) { output.textContent = JSON.stringify(source, null, 4); } else { output.textContent = source; } output.scrollTop = output.scrollHeight; } //Gets all submissions displayed on currentPage and returns array of urls. function getSubmissions() { var pageHtml = currentPage.documentElement.innerHTML; var regExDownloadLink = new RegExp('<a href="(\/view\/(.+?)\/)">', 'g'); var submissionLinks; while ((submissionLinks = regExDownloadLink.exec(pageHtml)) !== null) { submissions[(submissions.length - 1) + 1] = submissionLinks[1]; //page.href; } return submissions; } //retrieves the submission image url from the submission page document. function getSubmissionSource(page) { var source = null; var downloadButton = page.getElementsByClassName("download-logged-in")[0]; if (downloadButton) { //beta source = downloadButton.href; } else { //clasic var pageHtml = page.documentElement.innerHTML; var regExDownloadLink = new RegExp('<a href="(.*/art/.*/(?:stories/|poetry/)?.*/.*)">Download</a>'); source = 'http:' + pageHtml.match(regExDownloadLink)[1]; } return source; } function getTagsFromSubmission(page) { var tags = []; var tagEl = page.getElementsByClassName("tags"); for (i = 0; i < tagEl.length; i++) { tags[i] = tagEl[i].firstChild.textContent; } //also collect category, species, and gender info var categoryTagEl = page.getElementsByClassName("sidebar-section-no-bottom")[0]; //if this changes again... (ノಠ益ಠ)ノ彡┻━┻ var categoryTagsList = categoryTagEl.getElementsByTagName("strong"); var category = []; for (i = 0; i < categoryTagsList.length; i++) { var categoryTitle = categoryTagsList[i].textContent; var categoryTag = categoryTagsList[i].nextSibling.textContent; category[i] = categoryTitle + categoryTag.replace("|", ""); } //get rating var rating = "Rating: " + page.getElementsByClassName("rating-box")[0].textContent.replace("\n", ""); category.push(rating); return [tags, category]; } function retryFailed() { fetchPage(failed.slice(), 0, sources); failed = []; } function downloadComplete() { if (failed.length > 0) { setStatus("Retrying failed submissions,"); window.setTimeout(retryFailed, 3000); } else { if (downloadMetadata) { writeToOutput(sources, true); } else { writeToOutput(sources.join("\n")); } setStatus("Done"); } } //requests submission pages and adds submission url to collector array function fetchPage(submissions, num, collector) { //mysterious undefined variable being appended to the end of the array collector = collector || (downloadMetadata ? {} : []); log("subssss:", submissions[num]); if (submissions[num] !== undefined) { var xhr = new XMLHttpRequest(); xhr.addEventListener("loadend", function() { if (startMode === true) { setStatus("Cancelled"); if (downloadMetadata) { writeToOutput(sources, true); } return; } if (xhr.status == 200) { //Ok var page = this.responseXML; var source = getSubmissionSource(page); if (downloadMetadata === true) { var tags = getTagsFromSubmission(page); var fileName = decodeURIComponent(source.match(/[^\/]+$/)[0]); var title = page.getElementsByClassName("submission-title")[0]; var description = title.nextSibling.textContent; source = { image: decodeURIComponent(source), submission: submissions[num], //could provide id or url. using url for now. tags: tags[0], category: tags[1], artist: source.match(/art\/([^\/]+)\//)[1], description: description ? description : "", title: title.getElementsByClassName("submission-title-header")[0].innerText }; log(fileName); collector[fileName] = source; } else { collector[num] = source; //submissionsComplete = submissionsComplete + 1; } setStatus("Getting Submission Source:" + num + "/" + submissions.length); setProgress(num / submissions.length); var end = false; if (num < submissions.length) { end = true; fetchPage(submissions, ++num, collector); } if (downloadMetadata) { writeToOutput(source.image); } else { writeToOutput(collector.join("\n")); } } else { //bad request, add to fail list D: setStatus("Request Failed: " + submissions[num]); failed[(failed.length - 1) + 1] = submissions[num]; if (num < submissions.length) { //ugly patch fetchPage(submissions, ++num, collector); } } if ((num + 1) >= submissions.length) { sources = collector; downloadComplete(); } }); //log("GET:", submissions[num], num); xhr.open("GET", submissions[num]); xhr.responseType = "document"; xhr.send(); } } //returns true if 'no-images' is found on page function isLastPage(page) { var noImages = page.getElementById("no-images"); if (noImages === null) { return false; } else { return true; } } function getGallerySubmissions() { if (pageNum < (+downloadPages[1] + 1)) { var url = window.location.href; var pageNumURL = url.match(/(\/\d+\/*)$/); var nextPage = ""; if (pageNumURL === null) { nextPage = url + pageNum; } else { nextPage = url.replace(/(\/\d+\/*)$/, "/" + pageNum); } log("Next Page:", nextPage); if (startMode === true) { log("cancelled page scrape"); setStatus("Cancelled"); return; } setStatus("Scraping page: " + pageNum + "/" + (downloadAll ? "?" : downloadPages[1])); if (downloadAll === false) { setProgress(pageNum / downloadPages[1]); } var xhr = new XMLHttpRequest(); xhr.addEventListener("loadend", function() { if (xhr.status === 200) { //Ok currentPage = this.responseXML; //got all submission links, start getting sources if (isLastPage(currentPage) === true) { log("submissions found:", submissions.length); fetchPage(submissions, 0, sources); return; } pageNum = +pageNum + 1; getSubmissions(); writeToOutput(submissions.join("\n")); getGallerySubmissions(); } else { //bad request, try again. setStatus("Failed to get page " + pageNum + " trying again in 5 seconds."); window.setTimeout(getGallerySubmissions(), 5000); } }); xhr.open("GET", nextPage); xhr.responseType = "document"; xhr.send(); } else { //done log(""); log("pageNum:", pageNum); log("submissions found:", submissions.length); fetchPage(submissions, 0, sources); log("really"); } } //experimental download function (doesn't work) function downloadLinksFromOutput() { var output = masterDiv.querySelector("#output"); var links = output.getElementsbyTagName("a"); for (var i = 0; i < links.length; i++) { links[i].click(); } } //recursively scrapes gallery pages list and runs the show. function scrapeGallery() { getGallerySubmissions(); } //THE UI CODE ¯\_(ツ)_/¯ // initiates the download menu function downloadMenu() { masterDiv = document.createElement("div"); //div master race masterDiv.setAttribute("id", "Master"); var style = document.createElement("style"); style.setAttribute("scoped", ""); var subjects = [ //masterDiv's 'loyal' subjects '<div id="title">Download Gallery <div id="close">✕</div></div>', '<div class="divide"></div>', '<div id="pages">', ' <label for="pageInput">Pages:</label>', ' <input type="number" id="pageStart"> to <input type="number" id="pageEnd">', ' <input type="checkbox" id="allPages"><label for="allPages"> All Pages</label>', '</div>', '<input type="button" id="start" value="Start Download">', '<input type="checkbox" id="metadata"><label for="metadata"> Metadata</label>', ]; var css = [ '#Master {', ' position: fixed;', ' top: 50%;', ' left: 50%;', ' transform: translate(-50%, -50%);', ' display: inline-block;', ' z-index: 999;', ' background-color: rgba(255, 255, 255, 0.9);', ' padding: 20px;', ' color: #4D4D4D;', ' font-family: "segoe ui";', ' transition: height 0.3s;', ' -webkit-transition: height 0.3s;', '}', '#title {', ' position: relative;', ' text-align: center;', ' width: 100%;', ' top: -10px;', '}', '#close {', ' float: right;', '}', '#close:hover {', ' cursor: pointer;', '}', '.divide {', ' border-bottom: 1px solid rgba(200,200,200, 0.5);', ' width: 100%;', ' margin-bottom: 10px;', '}', '#start {', ' display: block;', ' margin: 0 auto;', ' margin-top: 10px;', '}', '#metadata {', ' /*float: left;*/', '}', '#statusBar {', ' width: 100%;', ' height: 25px;', ' background-color: rgba(200,200,200,0.5);', ' margin-bottom: 5px;', '}', '#statusText {', ' text-align: center;', '}', '#statusFill {', ' transition: width 0.2s;', ' -webkit-transition: width 0.2s;', ' height: 100%;', ' background-color: #4DA9B9;', '}', '#output {', ' width: 100%;', ' max-width: 800px;', ' height: 200px;', ' overflow: scroll;', ' border-radius: 0 !important;', ' color: #4D4D4D;', ' background-color: rgba(200,200,200,0.5) !important;', '}' ]; masterDiv.innerHTML = subjects.join(""); style.innerHTML = css.join(""); document.body.appendChild(masterDiv); masterDiv.appendChild(style); // hook up buttons and input var closeWindow = masterDiv.querySelector("#close"); var pageStart = masterDiv.querySelector("#pageStart"); var pageEnd = masterDiv.querySelector("#pageEnd"); var allPages = masterDiv.querySelector("#allPages"); var metadataButton = masterDiv.querySelector("#metadata"); var start = masterDiv.querySelector("#start"); closeWindow.addEventListener("click", function() { document.body.removeChild(masterDiv); resetGlobals(); masterDiv = null; }); //update downloadPages pageStart.addEventListener("input", function() { downloadPages[0] = pageStart.value; pageNum = downloadPages[0]; }); pageEnd.addEventListener("input", function() { downloadPages[1] = pageEnd.value; log(downloadPages[1]); }); //disable other inputs and prepare downloader to incrementally grab pages until end of gallery allPages.addEventListener("click", function() { var bool = allPages.checked; if (bool === true) { pageStart.setAttribute("disabled", ""); pageEnd.setAttribute("disabled", ""); downloadPages = [1, 99999]; //will stop when end of gallery is found. downloadAll = true; } else { pageStart.removeAttribute("disabled"); pageEnd.removeAttribute("disabled"); downloadPages = [pageStart.value, pageEnd.value]; downloadAll = false; } }); //Get tags, submission page, and submission file in JSON format. metadataButton.addEventListener("click", function() { downloadMetadata = metadataButton.checked; }); start.addEventListener("click", function() { if (startMode === true) { //start download var statusDiv = masterDiv.querySelector("#status"); if (statusDiv === null) { statusDiv = document.createElement("div"); statusDiv.setAttribute("id", "status"); var html = [ '<span id="statusText">status</span>', '<div id="statusBar">', ' <div id="statusFill" width=0></div>', '</div>', '<textarea id="output" spellcheck="false" wrap="off" readonly style="background-color: rgba(195, 195, 195, 0.51) ! important;"></textarea>' ]; statusDiv.innerHTML = html.join(""); masterDiv.appendChild(statusDiv); } start.setAttribute("value", "Stop Download"); startMode = false; scrapeGallery(); } else { // stop download resetGlobals(); downloadPages = [pageStart.value, pageEnd.value]; downloadAll = allPages.checked; start.setAttribute("value", "Start download"); } }); } function insertButton() { var insertAt = document.getElementsByClassName("user-profile-options")[0] || document.getElementsByClassName('tab')[0]; var button = document.createElement("input"); button.type = "button"; button.value = "Download Gallery"; var buttonCSS = [ "height: 100%;", "background: none;", "font-size: inherit;", "border: none;", "color: inherit;", "font-family: inherit;", "padding: 0 15px;" ]; button.setAttribute("style", buttonCSS.join("")); log(insertAt); insertAt.appendChild(button); button.addEventListener("click", function() { if (menuOpen === false) { downloadMenu(); menuOpen = true; } }); } insertButton();