NOTICE: By continued use of this site you understand and agree to the binding Terms of Service and Privacy Policy.
// ==UserScript== // @name 股吧抓取工具 // @namespace http://tampermonkey.net/ // @version 1.0 // @description 根据输入的目标股票代码和统计天数抓取股吧帖子数据,并在页面上显示实时进度和结果 // @author YourName // @license MIT // @match https://guba.eastmoney.com/* // @grant none // ==/UserScript== (function() { 'use strict'; // 创建浮动面板 const container = document.createElement('div'); container.id = 'tm-crawler-container'; container.style.position = 'fixed'; container.style.top = '10px'; container.style.right = '10px'; container.style.zIndex = '9999'; container.style.background = 'white'; container.style.border = '1px solid #ccc'; container.style.padding = '10px'; container.style.maxWidth = '300px'; container.style.fontSize = '14px'; container.style.boxShadow = '0 0 10px rgba(0,0,0,0.3)'; container.innerHTML = ` <h3 style="margin-top:0;">股吧抓取工具</h3> <label>目标股票代码: <input type="text" id="targetStockInput" value="002131"></label><br> <label>统计天数: <input type="number" id="daysToCheckInput" value="5" min="1"></label><br> <button id="startButton">开始抓取</button> <div id="progress-display" style="margin-top:10px; white-space: pre-wrap; min-height: 40px;"></div> <div id="result-display" style="margin-top:10px;"></div> <button id="closeButton" style="display:none; margin-top:10px;">关闭</button> `; document.body.appendChild(container); // 默认配置(其中targetStock和daysToCheck会从界面读取) const config = { targetStock: '002131', daysToCheck: 5, retryCount: 3, minDelay: 8000, // 8秒 maxDelay: 15000, // 15秒 requestTimeout: 20000 // 请求超时20秒 }; // 获取页面元素 const progressDisplay = document.getElementById('progress-display'); const resultDisplay = document.getElementById('result-display'); const closeButton = document.getElementById('closeButton'); const startButton = document.getElementById('startButton'); const targetStockInput = document.getElementById('targetStockInput'); const daysToCheckInput = document.getElementById('daysToCheckInput'); // 生成符合反爬策略的随机延迟 const antiBlockDelay = () => new Promise(r => setTimeout(r, Math.random() * (config.maxDelay - config.minDelay) + config.minDelay)); // 生成日期数组(格式为 MM-DD),从较早的日期开始 function generateDateRange(days) { const pad = n => n.toString().padStart(2, '0'); return Array.from({ length: days }, (_, i) => { const d = new Date(); d.setDate(d.getDate() - i); return `${pad(d.getMonth() + 1)}-${pad(d.getDate())}`; }).reverse(); } // 带重试和超时机制的请求函数 async function enhancedFetch(url) { const controller = new AbortController(); const timeoutId = setTimeout(() => controller.abort(), config.requestTimeout); for (let attempt = 1; attempt <= config.retryCount; attempt++) { try { const response = await fetch(url, { headers: { 'User-Agent': navigator.userAgent, 'Referer': location.href }, credentials: 'include', signal: controller.signal }); clearTimeout(timeoutId); if (!response.ok) throw new Error(`HTTP ${response.status}`); return await response.text(); } catch (err) { if (attempt === config.retryCount) throw err; await antiBlockDelay(); } } } // 核心抓取函数,统计指定日期内的帖子数量 async function crawlPosts() { const dateList = generateDateRange(config.daysToCheck); const countMap = Object.fromEntries(dateList.map(d => [d, 0])); // 使用 dateList[0] 作为截止日期(最早日期) const [refYear, cutoffMonth, cutoffDay] = [new Date().getFullYear(), ...dateList[0].split('-').map(Number)]; const cutoffDate = new Date(refYear, cutoffMonth - 1, cutoffDay); let currentPage = 1; let shouldStop = false; let latestDate = ''; const startTime = Date.now(); const dateRange = `${dateList[0]} 至 ${dateList[dateList.length - 1]}`; // 实时进度更新器,每1.5秒更新一次 const progressUpdater = setInterval(() => { const elapsed = Math.floor((Date.now() - startTime) / 1000); const currentTime = new Date().toLocaleTimeString(); progressDisplay.textContent = `[${currentTime}] 正在统计 ${dateRange} 第${currentPage}页 | 已运行: ${elapsed}s${latestDate ? ' | 最新日期: ' + latestDate : ''}`; }, 1500); try { while (!shouldStop) { const pageUrl = `https://guba.eastmoney.com/list,${config.targetStock},f${currentPage > 1 ? '_' + currentPage : ''}.html`; let html; try { html = await enhancedFetch(pageUrl); } catch (err) { resultDisplay.innerHTML += `<p style="color:red;">❌ 第${currentPage}页抓取失败: ${err.message}</p>`; break; } const parser = new DOMParser(); const doc = parser.parseFromString(html, 'text/html'); // 解析页面中每个帖子的日期 const postDateElements = doc.querySelectorAll('.update'); const postDates = []; postDateElements.forEach(el => { const rawDate = el.textContent.trim().split(/\s+/)[0]; let month, day; if (rawDate.includes('-')) { [month, day] = rawDate.split('-'); } else { month = rawDate.slice(0, 2); day = rawDate.slice(2); } latestDate = `${month.padStart(2, '0')}-${day.padStart(2, '0')}`; const dateObj = new Date(refYear, parseInt(month, 10) - 1, parseInt(day, 10)); postDates.push(dateObj); const dateStr = `${String(dateObj.getMonth() + 1).padStart(2, '0')}-${String(dateObj.getDate()).padStart(2, '0')}`; if (dateList.includes(dateStr)) { countMap[dateStr]++; } }); // 判断是否已超过设定的截止日期(当当前页所有帖子日期都早于截止日期时提前终止) if (postDates.length) { const maxDate = new Date(Math.max(...postDates)); if (maxDate < cutoffDate) { resultDisplay.innerHTML += `<p>✅ 提前终止: ${maxDate.toLocaleDateString()} 早于截止日期 ${cutoffDate.toLocaleDateString()}</p>`; shouldStop = true; } } else { // 如果本页没有帖子,则退出循环 shouldStop = true; } currentPage++; await antiBlockDelay(); } } catch (err) { resultDisplay.innerHTML += `<p style="color:red;">❌ 抓取过程中发生错误: ${err.message}</p>`; } finally { clearInterval(progressUpdater); } return countMap; } // 启动抓取逻辑,并在页面上展示最终结果(以表格形式) async function startCrawling() { // 从界面输入中更新配置 config.targetStock = targetStockInput.value.trim() || config.targetStock; config.daysToCheck = parseInt(daysToCheckInput.value, 10) || config.daysToCheck; // 清空之前的进度和结果 progressDisplay.textContent = '开始抓取...'; resultDisplay.innerHTML = ''; try { const results = await crawlPosts(); // 构造结果表格 let tableHTML = '<table border="1" style="border-collapse: collapse; width: 100%;">'; tableHTML += '<tr><th>日期</th><th>帖子数量</th></tr>'; for (const date of Object.keys(results)) { tableHTML += `<tr><td>${date}</td><td>${results[date]}</td></tr>`; } tableHTML += '</table>'; resultDisplay.innerHTML += tableHTML; closeButton.style.display = 'block'; } catch (err) { resultDisplay.innerHTML += `<p style="color:red;">❌ 错误: ${err.message}</p>`; closeButton.style.display = 'block'; } } // 绑定开始按钮事件 startButton.addEventListener('click', () => { startButton.disabled = true; startCrawling().finally(() => { startButton.disabled = false; }); }); // 绑定关闭按钮事件 closeButton.addEventListener('click', () => { container.style.display = 'none'; }); })();