NOTICE: By continued use of this site you understand and agree to the binding Terms of Service and Privacy Policy.
// ==UserScript== // @name Taobao Subway Crawler // @version 0.2.3 // @author zjh1943 // @description This userscript can crawl taobao subway campaign data every one hour. // @match *.taobao.com/* // @homePage https://github.com/zjh1943/crawler-userscript // @updateURL https://openuserjs.org/meta/zjh1943/My_Script.meta.js // @license GPL-3.0-or-later; http://www.gnu.org/licenses/gpl-3.0.txt // @copyright 2020, zjh1943 // @run-at document-idle // @require https://openuserjs.org/src/libs/sizzle/GM_config.js // @require https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.slim.min.js // @require https://gmousse.github.io/dataframe-js/dist/dataframe.min.js // @require https://cdnjs.cloudflare.com/ajax/libs/dexie/2.0.4/dexie.min.js // @require https://cdnjs.cloudflare.com/ajax/libs/moment.js/2.24.0/moment.min.js // @require https://cdnjs.cloudflare.com/ajax/libs/xlsx/0.15.5/xlsx.full.min.js // @require https://unpkg.com/later2@2.0.1/later.min.js // @grant unsafeWindow // @grant GM_xmlhttpRequest // @grant GM_setClipboard // @grant GM_setValue // @grant GM_getValue // @grant GM_deleteValue // @grant GM_openInTab // @grant GM_registerMenuCommand // @grant GM_unregisterMenuCommand // ==/UserScript== (function(){function r(e,n,t){function o(i,f){if(!n[i]){if(!e[i]){var c="function"==typeof require&&require;if(!f&&c)return c(i,!0);if(u)return u(i,!0);var a=new Error("Cannot find module '"+i+"'");throw a.code="MODULE_NOT_FOUND",a}var p=n[i]={exports:{}};e[i][0].call(p.exports,function(r){var n=e[i][1][r];return o(n||r)},p,p.exports,r,e,n,t)}return n[i].exports}for(var u="function"==typeof require&&require,i=0;i<t.length;i++)o(t[i]);return o}return r})()({1:[function(require,module,exports){ "use strict"; function asyncGeneratorStep(gen, resolve, reject, _next, _throw, key, arg) { try { var info = gen[key](arg); var value = info.value; } catch (error) { reject(error); return; } if (info.done) { resolve(value); } else { Promise.resolve(value).then(_next, _throw); } } function _asyncToGenerator(fn) { return function () { var self = this, args = arguments; return new Promise(function (resolve, reject) { var gen = fn.apply(self, args); function _next(value) { asyncGeneratorStep(gen, resolve, reject, _next, _throw, "next", value); } function _throw(err) { asyncGeneratorStep(gen, resolve, reject, _next, _throw, "throw", err); } _next(undefined); }); }; } function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } } function _defineProperty(obj, key, value) { if (key in obj) { Object.defineProperty(obj, key, { value: value, enumerable: true, configurable: true, writable: true }); } else { obj[key] = value; } return obj; } var DataFrame = dfjs.DataFrame; var _require = require('./helper'), createUrlGetter = _require.createUrlGetter, extractDataAndSimplify = _require.extractDataAndSimplify, simplifyText = _require.simplifyText, extractDataFromTable = _require.extractDataFromTable, getParameterFromUrl = _require.getParameterFromUrl; var anchorFilter = function anchorFilter(ele) { /** 暂停状态,不抓取 */ return $(ele).closest('tr').find('td span strong:contains("暂停")').length <= 0; }; var newUrlsGetter = createUrlGetter('a.ad-title', anchorFilter); var AdgroupsPage = function AdgroupsPage() { var _this = this; _classCallCheck(this, AdgroupsPage); _defineProperty(this, "id", 'Adgroups'); _defineProperty(this, "triggerOnUrl", function (url) { return !!url && !!url.match(/(https:\/\/subway.simba.taobao.com)?\/?(#\!\/manage\/campaign\/detail)(.*)/); }); _defineProperty(this, "getUrlsToAdd", function () { return _this.findNewUrl ? newUrlsGetter() : []; }); _defineProperty(this, "isPageReady", function () { return $('a.ad-title').length > 0 && $('#bp-scroll-table tr th').length > 0; }); _defineProperty(this, "onPageReady", /*#__PURE__*/ function () { var _ref = _asyncToGenerator( /*#__PURE__*/ regeneratorRuntime.mark(function _callee(fetchSN) { var dataFrame; return regeneratorRuntime.wrap(function _callee$(_context) { while (1) { switch (_context.prev = _context.next) { case 0: dataFrame = _this.parseData(fetchSN); _context.next = 3; return _this.onDataFrameReady(dataFrame); case 3: case "end": return _context.stop(); } } }, _callee); })); return function (_x) { return _ref.apply(this, arguments); }; }()); _defineProperty(this, "parseData", function (fetchSN) { var head = extractDataAndSimplify('#bp-scroll-table', 'tr', 'th'); var columns = head[0].map(function (v) { if (v.startsWith('状态')) return '状态';else if (v.startsWith('营销场景')) return '营销场景';else return v; }); var dataExtractor = function dataExtractor(ele) { var text = ''; if ($(ele).find('.ad-title').length > 0) { text = $(ele).find('.ad-title').text(); } else { text = $(ele).text(); } return simplifyText(text); }; var data = extractDataFromTable('table.bp-table[bx-name="table"]', 'tr', 'td', dataExtractor); var dataFrame = new DataFrame(data, columns); dataFrame = dataFrame.restructure(columns.filter(function (col) { return !!col; })); var urls = $.map($('a.ad-title'), function (value) { return $(value).attr('href'); }); var campaignIds = urls.map(function (v) { return getParameterFromUrl(v, 'campaignId'); }); dataFrame = dataFrame.withColumn('推广计划ID', function (_, index) { return campaignIds[index]; }); var adgroupIds = urls.map(function (v) { return getParameterFromUrl(v, 'adGroupId'); }); dataFrame = dataFrame.withColumn('推广单元ID', function (_, index) { return adgroupIds[index]; }); // const productIds = urls.map(v => getParameterFromUrl(v, 'productId')); // dataFrame = dataFrame.withColumn('宝贝ID', (_, index) => productIds[index]); var timeStr = moment().format('YYYY-MM-DD HH:mm:ss'); dataFrame = dataFrame.withColumn('抓取时间', function () { return timeStr; }); dataFrame = dataFrame.withColumn('Fetch SN', function () { return fetchSN; }); var shopName = $('span.header-nickname-inside:nth-of-type(1)').text(); dataFrame = dataFrame.withColumn('店铺名称', function () { return shopName; }); dataFrame.show(); return dataFrame; }); _defineProperty(this, "saveData", /*#__PURE__*/ function () { var _ref2 = _asyncToGenerator( /*#__PURE__*/ regeneratorRuntime.mark(function _callee2(dataFrame) { var _require2, db; return regeneratorRuntime.wrap(function _callee2$(_context2) { while (1) { switch (_context2.prev = _context2.next) { case 0: _require2 = require('./db'), db = _require2.db; _context2.next = 3; return db['adgroups_log'].bulkPut(dataFrame.toCollection()); case 3: _context2.next = 5; return db['headers'].put({ table_name: 'adgroups_log', 'columns': dataFrame.listColumns() }); case 5: case "end": return _context2.stop(); } } }, _callee2); })); return function (_x2) { return _ref2.apply(this, arguments); }; }()); this.findNewUrl = true; this.onDataFrameReady = this.saveData; }; ; module.exports = AdgroupsPage; },{"./db":7,"./helper":8}],2:[function(require,module,exports){ "use strict"; function asyncGeneratorStep(gen, resolve, reject, _next, _throw, key, arg) { try { var info = gen[key](arg); var value = info.value; } catch (error) { reject(error); return; } if (info.done) { resolve(value); } else { Promise.resolve(value).then(_next, _throw); } } function _asyncToGenerator(fn) { return function () { var self = this, args = arguments; return new Promise(function (resolve, reject) { var gen = fn.apply(self, args); function _next(value) { asyncGeneratorStep(gen, resolve, reject, _next, _throw, "next", value); } function _throw(err) { asyncGeneratorStep(gen, resolve, reject, _next, _throw, "throw", err); } _next(undefined); }); }; } function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } } function _defineProperty(obj, key, value) { if (key in obj) { Object.defineProperty(obj, key, { value: value, enumerable: true, configurable: true, writable: true }); } else { obj[key] = value; } return obj; } var DataFrame = dfjs.DataFrame; var _require = require('./helper'), createUrlGetter = _require.createUrlGetter, extractDataAndSimplify = _require.extractDataAndSimplify, concat2DArray = _require.concat2DArray, getParameterFromUrl = _require.getParameterFromUrl; var log = require('./logger'); var anchorFilter = function anchorFilter(ele) { /** 暂停状态,不抓取 */ return $(ele).closest('tr').find('span.status-0').length <= 0; }; var newUrlsGetter = createUrlGetter('.manage-common-table-container div.editor-content a', anchorFilter); var CampaignsPage = function CampaignsPage() { var _this = this; _classCallCheck(this, CampaignsPage); _defineProperty(this, "id", 'Campaigns'); _defineProperty(this, "onPageReady", /*#__PURE__*/ function () { var _ref = _asyncToGenerator( /*#__PURE__*/ regeneratorRuntime.mark(function _callee(fetchSN) { var dataFrame; return regeneratorRuntime.wrap(function _callee$(_context) { while (1) { switch (_context.prev = _context.next) { case 0: dataFrame = _this.parseData(fetchSN); _context.next = 3; return _this.onDataFrameReady(dataFrame); case 3: case "end": return _context.stop(); } } }, _callee); })); return function (_x) { return _ref.apply(this, arguments); }; }()); _defineProperty(this, "triggerOnUrl", function (url) { return !!url && !!url.match(/(https:\/\/subway.simba.taobao.com)?\/?(#\!\/manage\/campaign\/index)(.*)/); }); _defineProperty(this, "getUrlsToAdd", function () { return _this.findNewUrl ? newUrlsGetter() : []; }); _defineProperty(this, "isPageReady", function () { var ret = $('.manage-common-table-container div.editor-content a').length > 0; log.debug('isPageReady:', ret); return ret; }); _defineProperty(this, "saveData", /*#__PURE__*/ function () { var _ref2 = _asyncToGenerator( /*#__PURE__*/ regeneratorRuntime.mark(function _callee2(dataFrame) { var _require2, db; return regeneratorRuntime.wrap(function _callee2$(_context2) { while (1) { switch (_context2.prev = _context2.next) { case 0: _require2 = require('./db'), db = _require2.db; _context2.next = 3; return db['campaigns_log'].bulkPut(dataFrame.toCollection()); case 3: _context2.next = 5; return db['headers'].put({ table_name: 'campaigns_log', 'columns': dataFrame.listColumns() }); case 5: case "end": return _context2.stop(); } } }, _callee2); })); return function (_x2) { return _ref2.apply(this, arguments); }; }()); _defineProperty(this, "parseData", function (fetchSN) { var leftHead = extractDataAndSimplify('table[left="true"] thead', 'tr', 'th'); var leftData = extractDataAndSimplify('table[left="true"] tbody', 'tr[mxv]'); var rightHead = extractDataAndSimplify('table[center="true"] thead', 'tr', 'th'); var rightData = extractDataAndSimplify('table[center="true"] tbody', 'tr:not(.operation-tr):not(:last-of-type)', 'td'); var columns = concat2DArray(leftHead, rightHead)[0]; var data = concat2DArray(leftData, rightData); var dataFrame = new DataFrame(data, columns); dataFrame = dataFrame.restructure(columns.filter(function (col) { return !!col; })); var urls = $.map($('table[left="true"] tbody tr[mxv] .editor a'), function (value) { return $(value).attr('href'); }); var campaignIds = urls.map(function (v) { return getParameterFromUrl(v, 'campaignId'); }); dataFrame = dataFrame.withColumn('推广计划ID', function (_, index) { return campaignIds[index]; }); var timeStr = moment().format('YYYY-MM-DD HH:mm:ss'); dataFrame = dataFrame.withColumn('抓取时间', function () { return timeStr; }); dataFrame = dataFrame.withColumn('Fetch SN', function () { return fetchSN; }); var shopName = $('span.header-nickname-inside:nth-of-type(1)').text(); dataFrame = dataFrame.withColumn('店铺名称', function () { return shopName; }); dataFrame.show(); return dataFrame; }); this.findNewUrl = true; this.onDataFrameReady = this.saveData; }; module.exports = CampaignsPage; },{"./db":7,"./helper":8,"./logger":10}],3:[function(require,module,exports){ "use strict"; function asyncGeneratorStep(gen, resolve, reject, _next, _throw, key, arg) { try { var info = gen[key](arg); var value = info.value; } catch (error) { reject(error); return; } if (info.done) { resolve(value); } else { Promise.resolve(value).then(_next, _throw); } } function _asyncToGenerator(fn) { return function () { var self = this, args = arguments; return new Promise(function (resolve, reject) { var gen = fn.apply(self, args); function _next(value) { asyncGeneratorStep(gen, resolve, reject, _next, _throw, "next", value); } function _throw(err) { asyncGeneratorStep(gen, resolve, reject, _next, _throw, "throw", err); } _next(undefined); }); }; } function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } } function _defineProperty(obj, key, value) { if (key in obj) { Object.defineProperty(obj, key, { value: value, enumerable: true, configurable: true, writable: true }); } else { obj[key] = value; } return obj; } var DataFrame = dfjs.DataFrame; var log = require('./logger'); var _require = require('./helper'), concat2DArray = _require.concat2DArray, simplifyText = _require.simplifyText, extractDataFromTable = _require.extractDataFromTable, getParameterFromUrl = _require.getParameterFromUrl; var KeywordsPage = function KeywordsPage() { var _this = this; _classCallCheck(this, KeywordsPage); _defineProperty(this, "id", 'Keywords'); _defineProperty(this, "onPageReady", /*#__PURE__*/ function () { var _ref = _asyncToGenerator( /*#__PURE__*/ regeneratorRuntime.mark(function _callee(fetchSN) { var dataFrame; return regeneratorRuntime.wrap(function _callee$(_context) { while (1) { switch (_context.prev = _context.next) { case 0: dataFrame = _this.parseData(fetchSN); _context.next = 3; return _this.onDataFrameReady(dataFrame); case 3: case "end": return _context.stop(); } } }, _callee); })); return function (_x) { return _ref.apply(this, arguments); }; }()); _defineProperty(this, "triggerOnUrl", function (url) { return !!url && !!url.match(/(https:\/\/subway.simba.taobao.com)?\/?(#\!\/manage\/adgroup\/detail)(.*)/); }); _defineProperty(this, "getUrlsToAdd", function () { return []; }); _defineProperty(this, "isPageReady", function () { return $('.table-td .bp-table tr').length > 0; }); _defineProperty(this, "saveData", /*#__PURE__*/ function () { var _ref2 = _asyncToGenerator( /*#__PURE__*/ regeneratorRuntime.mark(function _callee2(dataFrame) { var _require2, db; return regeneratorRuntime.wrap(function _callee2$(_context2) { while (1) { switch (_context2.prev = _context2.next) { case 0: _require2 = require('./db'), db = _require2.db; _context2.next = 3; return db['keywords_log'].bulkPut(dataFrame.toCollection()); case 3: _context2.next = 5; return db['headers'].put({ table_name: 'keywords_log', 'columns': dataFrame.listColumns() }); case 5: case "end": return _context2.stop(); } } }, _callee2); })); return function (_x2) { return _ref2.apply(this, arguments); }; }()); _defineProperty(this, "parseData", function (fetchSN) { var REMOVE_SIGN = '%TO_REMOVE%'; var leftColumns = ["".concat(REMOVE_SIGN, "_1"), '状态', "".concat(REMOVE_SIGN, "_2"), '关键词']; var leftData = extractDataFromTable('.freeze-td table.bp-table', 'tr', 'td'); var hasSmartLibrary = false; if (leftData.length > 0) { var row = leftData[0]; var result = row[1].match(/(.*)(流量智选词包)/); if (result) { hasSmartLibrary = true; row[1] = result[1]; row[3] = result[2]; } } var rightHead = extractDataFromTable('table.bp-table.scroll-th', 'tr', 'th'); // log.debug('rightHead:', rightHead); var rightColumns = rightHead[0]; var columnsToReplace = ['质量分(PC)', '质量分(移动)', '排名(PC)', '排名(移动)', '出价(PC)', '出价(移动)']; rightColumns.splice.apply(rightColumns, [0, columnsToReplace.length].concat(columnsToReplace)); var rightData = extractDataFromTable('.table-td .bp-table', 'tr', 'td'); var columns = leftColumns.concat(rightColumns); var data = concat2DArray(leftData, rightData); var dataFrame = new DataFrame(data, columns); dataFrame = dataFrame.restructure(columns.filter(function (col) { return !col.includes(REMOVE_SIGN); })); dataFrame = dataFrame.withColumn('关键词', function (row) { return row.get('关键词').replace(/查看历史报表关键词全景图$/, ''); }); var campaignId = getParameterFromUrl(location.href, 'campaignId'); dataFrame = dataFrame.withColumn('推广计划ID', function () { return campaignId; }); var adgroupID = getParameterFromUrl(location.href, 'adGroupId'); dataFrame = dataFrame.withColumn('推广单元ID', function () { return adgroupID; }); var productUrl = $('article.box > a.imgcn80').attr('href'); var productId = getParameterFromUrl(productUrl, 'id'); dataFrame = dataFrame.withColumn('宝贝ID', function () { return productId; }); var timeStr = moment().format('YYYY-MM-DD HH:mm:ss'); dataFrame = dataFrame.withColumn('抓取时间', function () { return timeStr; }); dataFrame = dataFrame.withColumn('Fetch SN', function () { return fetchSN; }); var shopName = $('span.header-nickname-inside:nth-of-type(1)').text(); dataFrame = dataFrame.withColumn('店铺名称', function () { return shopName; }); dataFrame.show(); return dataFrame; }); this.findNewUrl = true; this.onDataFrameReady = this.saveData; }; module.exports = KeywordsPage; },{"./db":7,"./helper":8,"./logger":10}],4:[function(require,module,exports){ "use strict"; function asyncGeneratorStep(gen, resolve, reject, _next, _throw, key, arg) { try { var info = gen[key](arg); var value = info.value; } catch (error) { reject(error); return; } if (info.done) { resolve(value); } else { Promise.resolve(value).then(_next, _throw); } } function _asyncToGenerator(fn) { return function () { var self = this, args = arguments; return new Promise(function (resolve, reject) { var gen = fn.apply(self, args); function _next(value) { asyncGeneratorStep(gen, resolve, reject, _next, _throw, "next", value); } function _throw(err) { asyncGeneratorStep(gen, resolve, reject, _next, _throw, "throw", err); } _next(undefined); }); }; } function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } } function _defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } function _createClass(Constructor, protoProps, staticProps) { if (protoProps) _defineProperties(Constructor.prototype, protoProps); if (staticProps) _defineProperties(Constructor, staticProps); return Constructor; } function _defineProperty(obj, key, value) { if (key in obj) { Object.defineProperty(obj, key, { value: value, enumerable: true, configurable: true, writable: true }); } else { obj[key] = value; } return obj; } var _require = require('./retry'), check = _require.check, retry = _require.retry, waitUntil = _require.waitUntil; var log = require('./logger'); var Page = /*#__PURE__*/ function () { function Page() { _classCallCheck(this, Page); _defineProperty(this, "id", ''); } _createClass(Page, [{ key: "triggerOnUrl", value: function triggerOnUrl(url) { return false; } }, { key: "isPageReady", value: function isPageReady() { return false; } }, { key: "onPageReady", value: function onPageReady() { return true; } }, { key: "getUrlsToAdd", value: function getUrlsToAdd() { return []; } }]); return Page; }(); /** * @typedef {Object} ResourceOption * @property {string} id * @property {function} triggerOnUrl 当前配置适用于哪个 URL,`(url) => boolean` * @property {function} isPageReady 网页是否加载完全,`(document) => boolean` * @property {function} onPageReady 网页已经加载成功,需要在这里处理数据,或者执行某些操作。`(document) => boolean` * @property {function} getUrlsToAdd 当前网页有哪些链接该加入队列 `(document) => string[]` * * @typedef {Object} LoginOption * @property {string} loginPageURL * @property {function} needLogin 根据当前网页内容决定是否需要重新登录。`(document) => boolean` * @property {function} isLoginPageReady 登陆页面是否已经加载成功。`(document) => boolean` * @property {function} isLoginSuccess 登陆是否成功。`(document) => boolean` * @property {function} doLogin 执行登陆。`(document) => Promise` */ /** * @typedef {Object} CrawlerOption * @property {string} startPageURL * @property {function} gotoUrl `(url) => Promise` * @property {ResourceOption[]} pageList 资源列表 * @property {LoginOption} [login] 登陆配置,如果不需要登陆,可以不设置 * @property {number} [maxWait=10000] 加载网页时最长加载时间,单位:ms, * @property {number} [retryCount=3] 网页加载未成功时,最多重试次数, * @property {number} [operateInterval=1000] 每次操作间隔时间 * @property {function} [onCrawlComplete] `() => void` * @property {function} [onPageStart] `() => void` * @property {function} [onPageComplete] `() => void` * @property {number} [maxWait=8000] 每次加载完网页后停留时间 * @property {number} [minWait=3000] 每次加载完网页后停留时间 */ /** * 爬虫调度器 */ var Crawler = /** * @type {CrawlerOption} */ // 每启动一次 crawler,将分配一个新的 SN,作为此次抓取的唯一标识符。 // 将要抓取的 URL // 正在抓取的 URL /** * @param {CrawlerOption} options */ function Crawler(options) { var _this = this; _classCallCheck(this, Crawler); _defineProperty(this, "options", _defineProperty({ startPageURL: '', pageList: [], maxWait: 10000, retryCount: 3, operateInterval: 1000, minWait: 3000 }, "maxWait", 8000)); _defineProperty(this, "fetchSN", new Date().getTime()); _defineProperty(this, "urlList", []); _defineProperty(this, "currUrl", null); _defineProperty(this, "crawledUrlSet", new Set()); _defineProperty(this, "isCrawling", false); _defineProperty(this, "isPause", false); _defineProperty(this, "isToBeClear", false); _defineProperty(this, "clear", function () { _this.urlList = []; _this.crawledUrlSet.clear(); _this.isPause = false; _this.isCrawling = false; _this.isToBeClear = true; }); _defineProperty(this, "start", /*#__PURE__*/ _asyncToGenerator( /*#__PURE__*/ regeneratorRuntime.mark(function _callee() { return regeneratorRuntime.wrap(function _callee$(_context) { while (1) { switch (_context.prev = _context.next) { case 0: log.debug('_start', _this.urlList); _this.urlList = [_this.options.startPageURL]; _this.isPause = false; _this.isToBeClear = false; _this.isCrawling = true; _this.fetchSN = new Date().getTime(); return _context.abrupt("return", _this._start()); case 7: case "end": return _context.stop(); } } }, _callee); }))); _defineProperty(this, "restoreFromSavedState", /*#__PURE__*/ function () { var _ref2 = _asyncToGenerator( /*#__PURE__*/ regeneratorRuntime.mark(function _callee2(urlList, crawledUrlSet) { return regeneratorRuntime.wrap(function _callee2$(_context2) { while (1) { switch (_context2.prev = _context2.next) { case 0: _this.urlList = urlList; _this.crawledUrlSet = crawledUrlSet; _this.isCrawling = true; _this.isPause = false; _this.isToBeClear = false; _this.fetchSN = new Date().getTime(); return _context2.abrupt("return", _this._start()); case 7: case "end": return _context2.stop(); } } }, _callee2); })); return function (_x, _x2) { return _ref2.apply(this, arguments); }; }()); _defineProperty(this, "pause", function () { log.debug('_pause: this.urlList:', _this.urlList); _this.isPause = true; }); _defineProperty(this, "resume", /*#__PURE__*/ _asyncToGenerator( /*#__PURE__*/ regeneratorRuntime.mark(function _callee3() { return regeneratorRuntime.wrap(function _callee3$(_context3) { while (1) { switch (_context3.prev = _context3.next) { case 0: log.debug('_resume: this.urlList:', _this.urlList); _this.isPause = false; return _context3.abrupt("return", _this._start()); case 3: case "end": return _context3.stop(); } } }, _callee3); }))); _defineProperty(this, "_start", /*#__PURE__*/ _asyncToGenerator( /*#__PURE__*/ regeneratorRuntime.mark(function _callee4() { var _this$options, minWait, maxWait, onPageStart, onPageComplete, onCrawlComplete, timeToWait, url; return regeneratorRuntime.wrap(function _callee4$(_context4) { while (1) { switch (_context4.prev = _context4.next) { case 0: log.debug('_start: begin. this.urlList:', _this.urlList); _this$options = _this.options, minWait = _this$options.minWait, maxWait = _this$options.maxWait, onPageStart = _this$options.onPageStart, onPageComplete = _this$options.onPageComplete, onCrawlComplete = _this$options.onCrawlComplete; timeToWait = Math.floor(Math.random() * (maxWait - minWait)) + minWait; if (!(_this.urlList.length > 0)) { _context4.next = 11; break; } url = _this.urlList.splice(0, 1)[0]; _this.currUrl = url; _this.crawledUrlSet.add(url); if (onPageStart) onPageStart(url); return _context4.abrupt("return", _this._crawlPage(url)["catch"](function (reason) { /** 如果中间失败了,还是继续下一波,不要影响下一条任务 */ log.error('_start: crawl page fail:', url, ', reason:', reason); }).then(function () { return new Promise(function (resolve, _) { _this.currUrl = null; if (onPageComplete) onPageComplete(url); log.debug('_stat: wait time = ', timeToWait); setTimeout(resolve, timeToWait); }); }).then(function () { if (_this.isToBeClear) { if (onCrawlComplete) onCrawlComplete(); return Promise.reject(Crawler.QUIT_REASON_CLEAR); } else if (_this.isPause) { return Promise.reject(Crawler.QUIT_REASON_PAUSE); } else { return _this._start(); } })); case 11: _this.isCrawling = false; if (onCrawlComplete) onCrawlComplete(); case 13: case "end": return _context4.stop(); } } }, _callee4); }))); _defineProperty(this, "_openPageOnce", /*#__PURE__*/ function () { var _ref5 = _asyncToGenerator( /*#__PURE__*/ regeneratorRuntime.mark(function _callee5(url, isPageReady) { return regeneratorRuntime.wrap(function _callee5$(_context5) { while (1) { switch (_context5.prev = _context5.next) { case 0: return _context5.abrupt("return", _this.options.gotoUrl(url).then(function () { return waitUntil(isPageReady, _this.options.maxWait); })); case 1: case "end": return _context5.stop(); } } }, _callee5); })); return function (_x3, _x4) { return _ref5.apply(this, arguments); }; }()); _defineProperty(this, "_runFunctionAndLoginIfNeed", /*#__PURE__*/ function () { var _ref6 = _asyncToGenerator( /*#__PURE__*/ regeneratorRuntime.mark(function _callee6(fn) { var _len, args, _key, login, needLogin, _args6 = arguments; return regeneratorRuntime.wrap(function _callee6$(_context6) { while (1) { switch (_context6.prev = _context6.next) { case 0: for (_len = _args6.length, args = new Array(_len > 1 ? _len - 1 : 0), _key = 1; _key < _len; _key++) { args[_key - 1] = _args6[_key]; } login = _this.options.login; if (login) { _context6.next = 4; break; } return _context6.abrupt("return", fn.apply(void 0, args)); case 4: needLogin = login.needLogin; return _context6.abrupt("return", fn.apply(void 0, args)["catch"](function () { if (needLogin()) { return _this.login().then(function () { return fn.apply(void 0, args); }); } else { return Promise.reject(); } })); case 6: case "end": return _context6.stop(); } } }, _callee6); })); return function (_x5) { return _ref6.apply(this, arguments); }; }()); _defineProperty(this, "_openPageAndLoginIfNeed", /*#__PURE__*/ function () { var _ref7 = _asyncToGenerator( /*#__PURE__*/ regeneratorRuntime.mark(function _callee8(url) { var pageList, page, isPageReady, onPageReady, getUrlsToAdd; return regeneratorRuntime.wrap(function _callee8$(_context8) { while (1) { switch (_context8.prev = _context8.next) { case 0: // log.debug('_openPageAndLoginIfNeed: ', url) pageList = _this.options.pageList; page = pageList.find(function (r) { return r.triggerOnUrl(url); }); isPageReady = page.isPageReady, onPageReady = page.onPageReady, getUrlsToAdd = page.getUrlsToAdd; return _context8.abrupt("return", _this._runFunctionAndLoginIfNeed(_this._openPageOnce, url, isPageReady).then( /*#__PURE__*/ _asyncToGenerator( /*#__PURE__*/ regeneratorRuntime.mark(function _callee7() { var newUrls; return regeneratorRuntime.wrap(function _callee7$(_context7) { while (1) { switch (_context7.prev = _context7.next) { case 0: _context7.next = 2; return onPageReady(_this.fetchSN); case 2: newUrls = getUrlsToAdd(); newUrls = newUrls.filter(function (u) { return !_this.crawledUrlSet.has(u); }); log.debug('_openPageAndLoginIfNeed. newUrls:', newUrls); _this.urlList = _this.urlList.concat(newUrls); case 6: case "end": return _context7.stop(); } } }, _callee7); })))); case 4: case "end": return _context8.stop(); } } }, _callee8); })); return function (_x6) { return _ref7.apply(this, arguments); }; }()); _defineProperty(this, "_crawlPage", /*#__PURE__*/ function () { var _ref9 = _asyncToGenerator( /*#__PURE__*/ regeneratorRuntime.mark(function _callee9(url) { var retryCount; return regeneratorRuntime.wrap(function _callee9$(_context9) { while (1) { switch (_context9.prev = _context9.next) { case 0: log.debug('_crawlPage: ', url); retryCount = _this.options.retryCount; return _context9.abrupt("return", retry(function () { return _this._openPageAndLoginIfNeed(url); }, retryCount)); case 3: case "end": return _context9.stop(); } } }, _callee9); })); return function (_x7) { return _ref9.apply(this, arguments); }; }()); _defineProperty(this, "login", /*#__PURE__*/ _asyncToGenerator( /*#__PURE__*/ regeneratorRuntime.mark(function _callee10() { var _this$options2, login, maxWait, loginPageURL, isLoginPageReady, isLoginSuccess, doLogin, p; return regeneratorRuntime.wrap(function _callee10$(_context10) { while (1) { switch (_context10.prev = _context10.next) { case 0: log.debug('login'); _this$options2 = _this.options, login = _this$options2.login, maxWait = _this$options2.maxWait; if (login) { _context10.next = 6; break; } return _context10.abrupt("return"); case 6: loginPageURL = login.loginPageURL, isLoginPageReady = login.isLoginPageReady, isLoginSuccess = login.isLoginSuccess, doLogin = login.doLogin; p = null; if (isLoginPageReady()) { log.debug('login: isLoginPageReady.'); p = doLogin(); } else { log.debug('login: needReload'); p = _this._openPageOnce(loginPageURL, isLoginPageReady).then(doLogin); } return _context10.abrupt("return", p.then(function () { return waitUntil(isLoginSuccess, maxWait); })); case 10: case "end": return _context10.stop(); } } }, _callee10); }))); for (var key in options) { this.options[key] = options[key]; } }; _defineProperty(Crawler, "QUIT_REASON_CLEAR", 'quit_reason_clear'); _defineProperty(Crawler, "QUIT_REASON_PAUSE", 'quit_reason_pause'); module.exports = { Crawler: Crawler, Page: Page }; },{"./logger":10,"./retry":11}],5:[function(require,module,exports){ "use strict"; function _toConsumableArray(arr) { return _arrayWithoutHoles(arr) || _iterableToArray(arr) || _nonIterableSpread(); } function _nonIterableSpread() { throw new TypeError("Invalid attempt to spread non-iterable instance"); } function _iterableToArray(iter) { if (Symbol.iterator in Object(iter) || Object.prototype.toString.call(iter) === "[object Arguments]") return Array.from(iter); } function _arrayWithoutHoles(arr) { if (Array.isArray(arr)) { for (var i = 0, arr2 = new Array(arr.length); i < arr.length; i++) { arr2[i] = arr[i]; } return arr2; } } var log = require('./logger'); var KEY_CRAWLER_STATE = 'key_crawler_state'; function saveCrawler(crawler) { GM_setValue(KEY_CRAWLER_STATE, JSON.stringify({ isCrawling: crawler.isCrawling, currUrl: crawler.currUrl, urlList: crawler.urlList, crawledUrls: _toConsumableArray(crawler.crawledUrlSet) })); } function hasUnfinishedTask() { var state = JSON.parse(GM_getValue(KEY_CRAWLER_STATE, null)); log.debug("hasUnfinishedTask. state = ", state); return state && state.isCrawling; } function restoreCrawler(crawler) { var state = JSON.parse(GM_getValue(KEY_CRAWLER_STATE, null)); log.debug("restoreCrawler. state = ", state); var urlList = state.urlList, currUrl = state.currUrl, crawledUrls = state.crawledUrls; var crawledUrlSet = new Set(crawledUrls); if (currUrl) { urlList.splice(0, 0, currUrl); crawledUrlSet["delete"](currUrl); } return crawler.restoreFromSavedState(urlList, crawledUrlSet); } function clearCrawler() { GM_deleteValue(KEY_CRAWLER_STATE); } module.exports = { saveCrawler: saveCrawler, clearCrawler: clearCrawler, hasUnfinishedTask: hasUnfinishedTask, restoreCrawler: restoreCrawler }; },{"./logger":10}],6:[function(require,module,exports){ "use strict"; var log = require('./logger'); var KEY_CRAWLER_SCHEDULER = 'key_crawler_scheduler'; // === scheduler 存储和恢复 === function saveCrawlerScheduler(text) { GM_setValue(KEY_CRAWLER_SCHEDULER, text); } function clearCrawlerScheduler() { GM_deleteValue(KEY_CRAWLER_SCHEDULER); } function restoreScrawlerScheduler(startCrawlerSchedulerByText) { var text = GM_getValue(KEY_CRAWLER_SCHEDULER); log.debug("restoreScrawlerScheduler. text = ".concat(text, " ")); if (text) { startCrawlerSchedulerByText(text); } } // === scheduler 存储和恢复 === module.exports = { saveCrawlerScheduler: saveCrawlerScheduler, clearCrawlerScheduler: clearCrawlerScheduler, restoreScrawlerScheduler: restoreScrawlerScheduler }; },{"./logger":10}],7:[function(require,module,exports){ "use strict"; function asyncGeneratorStep(gen, resolve, reject, _next, _throw, key, arg) { try { var info = gen[key](arg); var value = info.value; } catch (error) { reject(error); return; } if (info.done) { resolve(value); } else { Promise.resolve(value).then(_next, _throw); } } function _asyncToGenerator(fn) { return function () { var self = this, args = arguments; return new Promise(function (resolve, reject) { var gen = fn.apply(self, args); function _next(value) { asyncGeneratorStep(gen, resolve, reject, _next, _throw, "next", value); } function _throw(err) { asyncGeneratorStep(gen, resolve, reject, _next, _throw, "throw", err); } _next(undefined); }); }; } var db = new Dexie('ant_log'); function initSchema() { db.version(1).stores({ 'campaigns_log': '++,推广计划ID', 'adgroups_log': '++,推广计划ID,推广单元ID', 'keywords_log': '++,推广计划ID,推广单元ID,关键词' }); db.version(2).stores({ 'campaigns_log': '++,推广计划ID', 'adgroups_log': '++,推广计划ID,推广单元ID', 'keywords_log': '++,推广计划ID,推广单元ID,关键词', 'headers': '&table_name' }); } function clear() { return _clear.apply(this, arguments); } function _clear() { _clear = _asyncToGenerator( /*#__PURE__*/ regeneratorRuntime.mark(function _callee() { return regeneratorRuntime.wrap(function _callee$(_context) { while (1) { switch (_context.prev = _context.next) { case 0: _context.next = 2; return db['campaigns_log'].clear(); case 2: _context.next = 4; return db['adgroups_log'].clear(); case 4: _context.next = 6; return db['keywords_log'].clear(); case 6: case "end": return _context.stop(); } } }, _callee); })); return _clear.apply(this, arguments); } initSchema(); module.exports = { db: db, clear: clear, initSchema: initSchema }; },{}],8:[function(require,module,exports){ "use strict"; var createUrlGetter = function createUrlGetter(cssSelector) { var filter = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : undefined; return function () { var list = []; var queryList = $(cssSelector); queryList.each(function () { if (filter && !filter(this)) return; var url = $(this).attr('href'); if (!url) return; if (url.startsWith('#')) { list.push('https://subway.simba.taobao.com/' + url); } else if (url.startsWith('https')) { list.push(url); } }); return list; }; }; function extractDataFromTable(table) { var row = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 'tr'; var cell = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 'td'; var textExtractor = arguments.length > 3 && arguments[3] !== undefined ? arguments[3] : undefined; var ret = []; $(table).find(row).each(function () { var row = []; $(this).find(cell).each(function () { textExtractor = textExtractor || function (ele) { return simplifyText($(ele).text()); }; row.push(textExtractor(this)); }); ret.push(row); }); return ret; } function simplifyText(str) { var ret = str || ''; ret = ret.replace(/[\s\r\n\t\ue000-\uffff]|()|(Ũ)/g, ''); ret = ret.trim(); return ret; } function extractDataAndSimplify(table) { var row = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 'tr'; var cell = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 'td'; return extractDataFromTable(table, row, cell); } function concat2DArray(left, right) { return left.map(function (value, index) { return value.concat(right[index]); }); } function getParameterFromUrl(url, name) { name = name.replace(/[\[]/, "\\\[").replace(/[\]]/, "\\\]"); var regexS = "[\\?&]" + name + "=([^&#]*)"; var regex = new RegExp(regexS); var results = regex.exec(url); return results == null ? null : results[1]; } function downloadXls(data, fileName) { var hiddenElement = document.createElement('a'); hiddenElement.href = 'data:text/csv;charset=utf-8,' + encodeURI(csv); hiddenElement.target = '_blank'; hiddenElement.download = 'people.csv'; hiddenElement.click(); } module.exports = { createUrlGetter: createUrlGetter, extractDataFromTable: extractDataFromTable, simplifyText: simplifyText, extractDataAndSimplify: extractDataAndSimplify, concat2DArray: concat2DArray, getParameterFromUrl: getParameterFromUrl }; },{}],9:[function(require,module,exports){ 'use strict'; function asyncGeneratorStep(gen, resolve, reject, _next, _throw, key, arg) { try { var info = gen[key](arg); var value = info.value; } catch (error) { reject(error); return; } if (info.done) { resolve(value); } else { Promise.resolve(value).then(_next, _throw); } } function _asyncToGenerator(fn) { return function () { var self = this, args = arguments; return new Promise(function (resolve, reject) { var gen = fn.apply(self, args); function _next(value) { asyncGeneratorStep(gen, resolve, reject, _next, _throw, "next", value); } function _throw(err) { asyncGeneratorStep(gen, resolve, reject, _next, _throw, "throw", err); } _next(undefined); }); }; } var DataFrame = dfjs.DataFrame; var log = require('./logger'); var _require = require('./crawler'), Crawler = _require.Crawler; var _require2 = require('./retry'), delayDo = _require2.delayDo; var crawlerSaver = require('./crawlerSaver'); var schedulerSaver = require('./crawlerSchedulerSaver'); function initConfigPannel() { GM_config.init({ 'id': 'Taobao_Crawler_Config', 'fields': { hidden1: { section: ['操作等待时间', '每加载一个网页后,等待一些时间,防止访问过快而被淘宝官方察觉.'], type: 'hidden' }, 'minWait': { 'label': '最少等待时间(秒)', 'type': 'int', 'default': '3' }, 'maxWait': { 'label': '最长等待时间(秒)', 'type': 'int', 'default': '5' }, hidden2: { 'section': ['定时器配置', '配置方法请参考这里:<a href="https://bunkat.github.io/later/parsers.html#text" target=”_blank”>配置帮助</a>'], type: 'hidden' }, 'scrawlScheduleText': { 'label': '抓取定时器配置', 'type': 'text', 'default': 'at 23:50 also every 1 hour between 1 and 23' }, 'downloadScheduleText': { 'label': '下载定时器配置', 'type': 'text', 'default': 'at 23:58' }, hidden3: { 'section': ['淘宝账户配置', '当登陆状态实效时,需要重新登陆。'], type: 'hidden' }, 'taobaoAccount': { 'label': '直通车账户', 'type': 'text', 'default': '' }, 'taobaoPWD': { 'label': '直通车密码', 'type': 'text', 'default': '' } } }); } var loginOptions = { loginPageURL: 'https://subway.simba.taobao.com/indexnew.jsp', // mx-view="common-home/views/pages/home/login" needLogin: function needLogin() { var mainWindow = $('div.home-body iframe').length > 0; var subWindow = $('#J_LoginBox .bd').length > 0; if (subWindow) { subWindow = $('#J_LoginBox .bd').css('display') !== 'none'; } return mainWindow || subWindow; }, isLoginPageReady: function isLoginPageReady() { return true; }, isLoginSuccess: function isLoginSuccess() { return $('#J_LoginBox .bd').css('display') === 'none'; }, doLogin: function () { var _doLogin = _asyncToGenerator( /*#__PURE__*/ regeneratorRuntime.mark(function _callee() { return regeneratorRuntime.wrap(function _callee$(_context) { while (1) { switch (_context.prev = _context.next) { case 0: _context.next = 2; return delayDo(function () { var account = GM_config.get('taobaoAccount'); log.debug('doLogin. type account: ', account); $('#J_StaticForm input#TPL_username_1').val(account); }, 1000); case 2: _context.next = 4; return delayDo(function () { var pwd = GM_config.get('taobaoPWD'); log.debug('doLogin. type pwd: ', pwd); $('#J_StaticForm input#TPL_password_1').val(pwd); }, 1000); case 4: _context.next = 6; return delayDo(function () { log.debug('doLogin. submit'); $('button#J_SubmitStatic').click(); }, 2000); case 6: case "end": return _context.stop(); } } }, _callee); })); function doLogin() { return _doLogin.apply(this, arguments); } return doLogin; }() }; function createCrawlerOptions() { var KeywordsPage = require('./KeywordsPage'); var CampaignsPage = require('./CampaignsPage'); var AdgroupsPage = require('./AdgroupsPage'); var options = { startPageURL: 'https://subway.simba.taobao.com/#!/manage/campaign/index', // startPageURL: 'https://subway.simba.taobao.com/#!/manage/campaign/detail?campaignId=40195486&start=2020-01-14&end=2020-01-14', minWait: (GM_config.get('minWait') || 3) * 1000, maxWait: (GM_config.get('maxWait') || 5) * 1000, gotoUrl: function () { var _gotoUrl = _asyncToGenerator( /*#__PURE__*/ regeneratorRuntime.mark(function _callee2(url) { return regeneratorRuntime.wrap(function _callee2$(_context2) { while (1) { switch (_context2.prev = _context2.next) { case 0: log.debug('gotoUrl:', url); location.href = url; case 2: case "end": return _context2.stop(); } } }, _callee2); })); function gotoUrl(_x) { return _gotoUrl.apply(this, arguments); } return gotoUrl; }(), pageList: [new CampaignsPage(), new AdgroupsPage(), new KeywordsPage()], login: loginOptions, onPageStart: function onPageStart() { crawlerSaver.saveCrawler(currRunningCrawler); }, onCrawlComplete: function onCrawlComplete() { crawlerSaver.clearCrawler(); } }; return options; } function createDefaultCrawler() { var options = createCrawlerOptions(); return new Crawler(options); } function createOnePageCrawler() { var options = createCrawlerOptions(); var pageList = options.pageList; var url = window.location.href; var newPageList = pageList.filter(function (p) { return p.triggerOnUrl(url); }); newPageList.forEach(function (p) { log.debug('createOnePageCrawler: id = ', p.id); p.findNewUrl = false; p.onDataFrameReady = /*#__PURE__*/ function () { var _ref = _asyncToGenerator( /*#__PURE__*/ regeneratorRuntime.mark(function _callee3(dataFrame) { var workbook, data, option, sheet, timeStr, prefix; return regeneratorRuntime.wrap(function _callee3$(_context3) { while (1) { switch (_context3.prev = _context3.next) { case 0: workbook = XLSX.utils.book_new(); data = dataFrame.toCollection(); // const header = await db['headers'].where({ 'table_name': tableName }).first(); option = undefined; // header ? { header } : undefined; sheet = XLSX.utils.json_to_sheet(data, option); XLSX.utils.book_append_sheet(workbook, sheet, p.id); timeStr = moment().format('YYYY-MM-DD_HH-mm-ss'); prefix = p.id; XLSX.writeFile(workbook, "".concat(prefix, "_").concat(timeStr, ".xls")); case 8: case "end": return _context3.stop(); } } }, _callee3); })); return function (_x2) { return _ref.apply(this, arguments); }; }(); }); options.pageList = newPageList; options.startPageURL = url; options.onPageStart = function () {}; options.onCrawlComplete = function () {}; return new Crawler(options); } function crawlCurrPage() { var crawler = createOnePageCrawler(); crawler.start(); } var GMMenus = [{ name: '启动定时抓取', fn: startCrawlerScheduler, accessKey: 'start' }, { name: '终止定时抓取', fn: stopCrawlerScheduler, accessKey: 'end' }, { name: '仅抓取一次', fn: scrawlOnce, accessKey: 'once' }, { name: '仅抓取此页', fn: crawlCurrPage, accessKey: 'curr' }, { name: '下载缓存的数据', fn: downloadData, accessKey: 'download' }, { name: '清空缓存的数据', fn: clearData, accessKey: 'clear' }, { name: '爬虫配置', fn: function fn() { return GM_config.open(); }, accessKey: 'config' }]; GMMenus.forEach(function (m) { GM_registerMenuCommand(m.name, m.fn, m.accessKey); }); function startCrawlerSchedulerByText(text) { if (crawlerScheduler) { alert("爬虫正在进行中。请勿重复启动"); return; } try { var sched = later.parse.text(text); schedulerSaver.saveCrawlerScheduler(text); crawlerScheduler = later.setInterval(scrawlOnce, sched); log.debug('startCrawlerScheduler: next 24 occurences: ', later.schedule(sched).next(24)); } catch (_unused) { alert('定时器配置错误,请重新配置'); } } function startCrawlerScheduler() { var text = GM_config.get('scrawlScheduleText') || 'at 23:50 also every 1 hour between 1 and 23'; startCrawlerSchedulerByText(text); } function stopCrawlerScheduler() { schedulerSaver.clearCrawlerScheduler(); if (!crawlerScheduler) { alert("定时器尚未启动."); return; } crawlerScheduler.clear(); crawlerScheduler = null; log.debug('stopCrawlerScheduler'); } var lastScrawlOnceTime = 0; function scrawlOnce() { var currTime = new Date().getTime(); if (lastScrawlOnceTime + 60 * 1000 > currTime) { //fixme: later.js 有 bug,导致回调函数被重复调用 N 次。这里先打个补丁,后面 later.js 修复后再更新。 console.warn('Scrawl too many times in a short time!'); return; } lastScrawlOnceTime = currTime; if (currRunningCrawler) { currRunningCrawler.clear(); } currRunningCrawler = createDefaultCrawler(); currRunningCrawler.start().then(function () { log.debug('scrawlOnce: crawler done.'); })["catch"](function (e) { log.debug('scrawlOnce: crawler quit with error: ', e); }); } function downloadData() { return _downloadData.apply(this, arguments); } function _downloadData() { _downloadData = _asyncToGenerator( /*#__PURE__*/ regeneratorRuntime.mark(function _callee5() { var clearAfterDownload, _require3, db, clear, workbook, tables, _i, _tables, tableName, data, option, sheet, timeStr, prefix, _args5 = arguments; return regeneratorRuntime.wrap(function _callee5$(_context5) { while (1) { switch (_context5.prev = _context5.next) { case 0: clearAfterDownload = _args5.length > 0 && _args5[0] !== undefined ? _args5[0] : false; log.debug('downloadData: tables: ', tables); _require3 = require('./db'), db = _require3.db, clear = _require3.clear; workbook = XLSX.utils.book_new(); tables = ['campaigns_log', 'adgroups_log', 'keywords_log']; _i = 0, _tables = tables; case 6: if (!(_i < _tables.length)) { _context5.next = 17; break; } tableName = _tables[_i]; _context5.next = 10; return db[tableName].toArray(); case 10: data = _context5.sent; // const header = await db['headers'].where({ 'table_name': tableName }).first(); option = undefined; // header ? { header } : undefined; sheet = XLSX.utils.json_to_sheet(data, option); XLSX.utils.book_append_sheet(workbook, sheet, tableName); case 14: _i++; _context5.next = 6; break; case 17: timeStr = moment().format('YYYY-MM-DD_HH-mm-ss'); prefix = tables.length === 1 ? tables[0] : 'AntCrawler'; XLSX.writeFile(workbook, "".concat(prefix, "_").concat(timeStr, ".xls")); if (!clearAfterDownload) { _context5.next = 23; break; } _context5.next = 23; return clear(); case 23: case "end": return _context5.stop(); } } }, _callee5); })); return _downloadData.apply(this, arguments); } function clearData() { var DB = require('./db'); DB.clear(); } function startDownloadScheduler() { var text = GM_config.get('downloadScheduleText') || 'at 23:58'; log.debug('startDownloadScheduler: text:', text); try { var sched = later.parse.text(text); later.setInterval(function () { return downloadData(true); }, sched); log.debug('startDownloadScheduler: next 10 occurences: ', later.schedule(sched).next(10)); } catch (e) { log.error(e); alert('定时器配置错误,请重新配置'); } } var crawlerScheduler = null; var currRunningCrawler = null; window.onload = /*#__PURE__*/ _asyncToGenerator( /*#__PURE__*/ regeneratorRuntime.mark(function _callee4() { return regeneratorRuntime.wrap(function _callee4$(_context4) { while (1) { switch (_context4.prev = _context4.next) { case 0: initConfigPannel(); later.date.localTime(); // 在 Tampermonkey 中,一个网页有多个 frame,每个 frame 都满足 userscript 的触发条件时,会启动多个实例。 // 在 Tampermonkey 中,不同源的 iframe ,很难进行直接操作。所以,必须分开在两个环境中进行。 // top window if (!(window.top == window.self)) { _context4.next = 9; break; } log.debug('top window'); // 初始化 // 启动下载数据的调度器 startDownloadScheduler(); // 恢复调度器 schedulerSaver.restoreScrawlerScheduler(startCrawlerSchedulerByText); // 恢复之前未完成的爬取任务 if (crawlerSaver.hasUnfinishedTask()) { log.debug("restore Unfinished Crawler"); if (!loginOptions.needLogin()) { currRunningCrawler = createDefaultCrawler(); crawlerSaver.restoreCrawler(currRunningCrawler); } } _context4.next = 15; break; case 9: // inner window log.debug('inner window'); // 判断是否是登陆页面 if (!(crawlerSaver.hasUnfinishedTask() && loginOptions.needLogin())) { _context4.next = 15; break; } log.debug("login"); currRunningCrawler = createDefaultCrawler(); _context4.next = 15; return currRunningCrawler.login(); case 15: case "end": return _context4.stop(); } } }, _callee4); })); },{"./AdgroupsPage":1,"./CampaignsPage":2,"./KeywordsPage":3,"./crawler":4,"./crawlerSaver":5,"./crawlerSchedulerSaver":6,"./db":7,"./logger":10,"./retry":11}],10:[function(require,module,exports){ "use strict"; var createLog = function createLog(fn) { return function () { for (var _len = arguments.length, args = new Array(_len), _key = 0; _key < _len; _key++) { args[_key] = arguments[_key]; } return fn.apply(void 0, ["[ ==== ".concat(moment().format('YYYY-MM-DD HH:mm:ss'), " ==== ]")].concat(args)); }; }; var log = { debug: createLog(console.log), log: createLog(console.log), trace: createLog(console.trace), info: createLog(console.info), warn: createLog(console.warn), error: createLog(console.error) }; module.exports = log; },{}],11:[function(require,module,exports){ "use strict"; function asyncGeneratorStep(gen, resolve, reject, _next, _throw, key, arg) { try { var info = gen[key](arg); var value = info.value; } catch (error) { reject(error); return; } if (info.done) { resolve(value); } else { Promise.resolve(value).then(_next, _throw); } } function _asyncToGenerator(fn) { return function () { var self = this, args = arguments; return new Promise(function (resolve, reject) { var gen = fn.apply(self, args); function _next(value) { asyncGeneratorStep(gen, resolve, reject, _next, _throw, "next", value); } function _throw(err) { asyncGeneratorStep(gen, resolve, reject, _next, _throw, "throw", err); } _next(undefined); }); }; } var log = require('./logger'); function retry(_x) { return _retry.apply(this, arguments); } function _retry() { _retry = _asyncToGenerator( /*#__PURE__*/ regeneratorRuntime.mark(function _callee(fn) { var count, interval, retriesLeft, _args = arguments; return regeneratorRuntime.wrap(function _callee$(_context) { while (1) { switch (_context.prev = _context.next) { case 0: count = _args.length > 1 && _args[1] !== undefined ? _args[1] : 5; interval = _args.length > 2 && _args[2] !== undefined ? _args[2] : 1000; retriesLeft = count; return _context.abrupt("return", new Promise(function (resolve, reject) { fn().then(resolve)["catch"](function (error) { setTimeout(function () { if (retriesLeft <= 1) { // reject('maximum retries exceeded'); reject(error); } else { retry(fn, retriesLeft - 1, interval).then(resolve, reject); } // Passing on "reject" is the important part }, interval); }); })); case 4: case "end": return _context.stop(); } } }, _callee); })); return _retry.apply(this, arguments); } function check(_x2) { return _check.apply(this, arguments); } function _check() { _check = _asyncToGenerator( /*#__PURE__*/ regeneratorRuntime.mark(function _callee2(fn) { var count, interval, _args2 = arguments; return regeneratorRuntime.wrap(function _callee2$(_context2) { while (1) { switch (_context2.prev = _context2.next) { case 0: count = _args2.length > 1 && _args2[1] !== undefined ? _args2[1] : 5; interval = _args2.length > 2 && _args2[2] !== undefined ? _args2[2] : 1000; return _context2.abrupt("return", new Promise(function (resolve, reject) { var retryLeft = count; var timerID = setInterval(function () { log.debug('check: retryLeft:', retryLeft); if (fn()) { clearInterval(timerID); resolve(); return; } retryLeft--; if (retryLeft <= 0) { clearInterval(timerID); reject(); } }, interval); })); case 3: case "end": return _context2.stop(); } } }, _callee2); })); return _check.apply(this, arguments); } function waitUntil(_x3) { return _waitUntil.apply(this, arguments); } function _waitUntil() { _waitUntil = _asyncToGenerator( /*#__PURE__*/ regeneratorRuntime.mark(function _callee3(fn) { var maxWait, interval, _args3 = arguments; return regeneratorRuntime.wrap(function _callee3$(_context3) { while (1) { switch (_context3.prev = _context3.next) { case 0: maxWait = _args3.length > 1 && _args3[1] !== undefined ? _args3[1] : 10000; interval = _args3.length > 2 && _args3[2] !== undefined ? _args3[2] : 1000; return _context3.abrupt("return", check(fn, Math.ceil(maxWait / interval), interval)); case 3: case "end": return _context3.stop(); } } }, _callee3); })); return _waitUntil.apply(this, arguments); } function delayDo(_x4) { return _delayDo.apply(this, arguments); } function _delayDo() { _delayDo = _asyncToGenerator( /*#__PURE__*/ regeneratorRuntime.mark(function _callee4(fn) { var delay, _args4 = arguments; return regeneratorRuntime.wrap(function _callee4$(_context4) { while (1) { switch (_context4.prev = _context4.next) { case 0: delay = _args4.length > 1 && _args4[1] !== undefined ? _args4[1] : 1000; return _context4.abrupt("return", new Promise(function (resolve, _) { setTimeout(function () { fn(); resolve(); }, delay); })); case 2: case "end": return _context4.stop(); } } }, _callee4); })); return _delayDo.apply(this, arguments); } module.exports = { retry: retry, check: check, waitUntil: waitUntil, delayDo: delayDo }; },{"./logger":10}]},{},[9]);