NOTICE: By continued use of this site you understand and agree to the binding Terms of Service and Privacy Policy.
// ==UserScript== // @name msg_parser-vk.com // @namespace Parser For Vk.com Messages // @match https://vk.com/im // @match https://vk.com/im?sel=* // @grant GM_listValues // @grant GM_setValue // @grant GM_getValue // @license MIT // @run-at document-end // @version 1.0 // @author suffermuffin // @description 23.08.2023, 17:23:42 // ==/UserScript== const forbiddenChats = ['https://vk.com/im?sel=000000', 'https://vk.com/im?sel=111111'] // Personal chats, that won't be parsed const separator = '⍼' // Separator that will be used for generating CSV file const startingChat = 0 // First chat to parse from above const chatNum = 10 // Number of chats to be parsed const msgDepth = 5 // How deep into chat should program parse const scrollAwaitTime = 1000 // Timeout between scrolls, might be useful if internet connection is weak const scrollRetryAttempts = 4 // Number of checks before calling "Reached Top of Chat" state class vkParser{ constructor(chatNum, msgDepth){ this.chatNum = chatNum; this.msgDepth = msgDepth; this.msId = 0; this.chatWin = document.getElementsByClassName("ui_scroll_outer")[2]; this.pplWin = document.getElementsByClassName("ui_scroll_outer")[1]; } /** Makes 1 scroll to (0,0) coordinates, before the coordinate grip updates */ async scroll(){ const pr = new Promise((resolve, reject) => { setTimeout(() => { if (this.chatWin.scrollTop === 0) {resolve(true);} this.chatWin.scroll(0, 0); resolve(false); reject(true); }, scrollAwaitTime) }) return pr } /** Iteration over number of scrolls */ async scroller(){ let confirmTop = 0 for (let i = 0; i <= this.msgDepth; i++){ if (await this.scroll()){ confirmTop++; i--; console.log('Page might been not loaded, confirming (' + confirmTop + '/' + scrollRetryAttempts + ')') } else { console.log("Scroll depth", i); confirmTop = 0 } if (confirmTop === scrollRetryAttempts) {console.log('Reached the top of conversation'); break;} } } /** Cheks validation of URL */ async checkUrl(){ const pr = new Promise((resolve, reject) =>{ setTimeout(() => { if ( !document.URL.includes('https://vk.com/im?sel=c') && !(forbiddenChats.includes(document.URL)) ) { resolve(true) reject(this.checkUrl()) } else { console.log("Group or personal chat encountered => skipping"); resolve(false) reject(this.checkUrl()) } }, 1000) }) return pr } /** Scraping each message body */ scrapeTxt(){ const elements = document.getElementsByClassName("im-mess-stack _im_mess_stack ") let count = 0 for (let elem of elements) { let txt = '' let txtNode = elem.getElementsByClassName("im-mess--text wall_module _im_log_body") txtNode.forEach(nod => { for (let childNode of nod.childNodes) { if (childNode.nodeName.includes('#text') && !(childNode.data === "")) { txt = txt + childNode.data.charAt(0).toUpperCase() + childNode.data.replaceAll('\n', '. ').slice(1) + '. ' } } }) if (!(txt === "" || elem.dataset.peer == 0)){ GM_setValue(this.msId, this.msId + separator + elem.dataset.peer + separator + txt.slice(0, -2)); this.msId += 1; count++; } } console.log('Extracted', count, 'messages') } /** Scrolling left window, containing all chats until desired number of chats were loaded */ async pplDepth(scrollLength){ const pr = new Promise((resolve, reject) => { setTimeout(() => { this.pplWin.scroll(0, scrollLength); const chatList = document.getElementsByClassName("nim-dialog"); resolve(chatList) reject(this.pplDepth()) }, 600) }) return pr } /** Main function */ async parse(){ let chatList = document.getElementsByClassName("nim-dialog"); let scrollLength = 0 while(chatList.length <= this.chatNum){ scrollLength += 1000 chatList = await this.pplDepth(scrollLength) } chatList = Array.from(chatList).slice(0, this.chatNum) for (let [index, chat] of chatList.slice(startingChat).entries()){ chat.click() let valid = await this.checkUrl() if (valid === true) { await this.scroller() .then(() => this.scrapeTxt()) } console.log("CHAT №", '(' + index + '/' + chatNum + ')') } } } function main(){ const m = new vkParser(chatNum, msgDepth) m.parse() } function getArray(){ let arrayOfKeys = GM_listValues() let csvArray = [] for (let i of arrayOfKeys) { if (!((GM_getValue(i) == 0) || (GM_getValue(i)) == "")){ csvArray.push([GM_getValue(i)]) } } return csvArray } function getCsv(){ const arr = getArray(); // let csvContent = "data:text/csv;charset=utf-8,index" + separator + "user_id" + separator + "text\n"; let csvContent = "index" + separator + "user_id" + separator + "text\n"; arr.forEach(function(rowArray) { let row = rowArray; csvContent += row + "\n"; }) console.log(arr.length) // var encodedUri = encodeURI(csvContent); var encodedUri = encodeURIComponent(csvContent) const dataUri = `data:text/csv;charset=utf-8,${encodedUri}`; // console.log(dataUri); window.open(dataUri); } const isExec = confirm('Execute parser?') if (isExec){ main() } else { const isCsv = confirm('Download parsed dataset?') if (isCsv){ getCsv() } } setTimeout(() => { const ad = document.getElementById("ads_left"); ad.remove(); }, 600)