Raw Source
zjh1943 / Taobao Subway Crawler

// ==UserScript==
// @name         Taobao Subway Crawler
// @version      0.2.3
// @author       zjh1943
// @description  This userscript can crawl taobao subway campaign data every one hour.
// @match        *.taobao.com/*
// @homePage     https://github.com/zjh1943/crawler-userscript
// @updateURL    https://openuserjs.org/meta/zjh1943/My_Script.meta.js
// @license      GPL-3.0-or-later; http://www.gnu.org/licenses/gpl-3.0.txt
// @copyright    2020, zjh1943
// @run-at       document-idle
// @require      https://openuserjs.org/src/libs/sizzle/GM_config.js
// @require      https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.slim.min.js
// @require      https://gmousse.github.io/dataframe-js/dist/dataframe.min.js
// @require      https://cdnjs.cloudflare.com/ajax/libs/dexie/2.0.4/dexie.min.js
// @require      https://cdnjs.cloudflare.com/ajax/libs/moment.js/2.24.0/moment.min.js
// @require      https://cdnjs.cloudflare.com/ajax/libs/xlsx/0.15.5/xlsx.full.min.js
// @require      https://unpkg.com/later2@2.0.1/later.min.js
// @grant             unsafeWindow
// @grant             GM_xmlhttpRequest
// @grant             GM_setClipboard
// @grant             GM_setValue
// @grant             GM_getValue
// @grant             GM_deleteValue
// @grant             GM_openInTab
// @grant             GM_registerMenuCommand
// @grant             GM_unregisterMenuCommand
// ==/UserScript==

(function(){function r(e,n,t){function o(i,f){if(!n[i]){if(!e[i]){var c="function"==typeof require&&require;if(!f&&c)return c(i,!0);if(u)return u(i,!0);var a=new Error("Cannot find module '"+i+"'");throw a.code="MODULE_NOT_FOUND",a}var p=n[i]={exports:{}};e[i][0].call(p.exports,function(r){var n=e[i][1][r];return o(n||r)},p,p.exports,r,e,n,t)}return n[i].exports}for(var u="function"==typeof require&&require,i=0;i<t.length;i++)o(t[i]);return o}return r})()({1:[function(require,module,exports){
"use strict";

function asyncGeneratorStep(gen, resolve, reject, _next, _throw, key, arg) { try { var info = gen[key](arg); var value = info.value; } catch (error) { reject(error); return; } if (info.done) { resolve(value); } else { Promise.resolve(value).then(_next, _throw); } }

function _asyncToGenerator(fn) { return function () { var self = this, args = arguments; return new Promise(function (resolve, reject) { var gen = fn.apply(self, args); function _next(value) { asyncGeneratorStep(gen, resolve, reject, _next, _throw, "next", value); } function _throw(err) { asyncGeneratorStep(gen, resolve, reject, _next, _throw, "throw", err); } _next(undefined); }); }; }

function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } }

function _defineProperty(obj, key, value) { if (key in obj) { Object.defineProperty(obj, key, { value: value, enumerable: true, configurable: true, writable: true }); } else { obj[key] = value; } return obj; }

var DataFrame = dfjs.DataFrame;

var _require = require('./helper'),
    createUrlGetter = _require.createUrlGetter,
    extractDataAndSimplify = _require.extractDataAndSimplify,
    simplifyText = _require.simplifyText,
    extractDataFromTable = _require.extractDataFromTable,
    getParameterFromUrl = _require.getParameterFromUrl;

var anchorFilter = function anchorFilter(ele) {
  /** 暂停状态,不抓取 */
  return $(ele).closest('tr').find('td span strong:contains("暂停")').length <= 0;
};

var newUrlsGetter = createUrlGetter('a.ad-title', anchorFilter);

var AdgroupsPage = function AdgroupsPage() {
  var _this = this;

  _classCallCheck(this, AdgroupsPage);

  _defineProperty(this, "id", 'Adgroups');

  _defineProperty(this, "triggerOnUrl", function (url) {
    return !!url && !!url.match(/(https:\/\/subway.simba.taobao.com)?\/?(#\!\/manage\/campaign\/detail)(.*)/);
  });

  _defineProperty(this, "getUrlsToAdd", function () {
    return _this.findNewUrl ? newUrlsGetter() : [];
  });

  _defineProperty(this, "isPageReady", function () {
    return $('a.ad-title').length > 0 && $('#bp-scroll-table tr th').length > 0;
  });

  _defineProperty(this, "onPageReady",
  /*#__PURE__*/
  function () {
    var _ref = _asyncToGenerator(
    /*#__PURE__*/
    regeneratorRuntime.mark(function _callee(fetchSN) {
      var dataFrame;
      return regeneratorRuntime.wrap(function _callee$(_context) {
        while (1) {
          switch (_context.prev = _context.next) {
            case 0:
              dataFrame = _this.parseData(fetchSN);
              _context.next = 3;
              return _this.onDataFrameReady(dataFrame);

            case 3:
            case "end":
              return _context.stop();
          }
        }
      }, _callee);
    }));

    return function (_x) {
      return _ref.apply(this, arguments);
    };
  }());

  _defineProperty(this, "parseData", function (fetchSN) {
    var head = extractDataAndSimplify('#bp-scroll-table', 'tr', 'th');
    var columns = head[0].map(function (v) {
      if (v.startsWith('状态')) return '状态';else if (v.startsWith('营销场景')) return '营销场景';else return v;
    });

    var dataExtractor = function dataExtractor(ele) {
      var text = '';

      if ($(ele).find('.ad-title').length > 0) {
        text = $(ele).find('.ad-title').text();
      } else {
        text = $(ele).text();
      }

      return simplifyText(text);
    };

    var data = extractDataFromTable('table.bp-table[bx-name="table"]', 'tr', 'td', dataExtractor);
    var dataFrame = new DataFrame(data, columns);
    dataFrame = dataFrame.restructure(columns.filter(function (col) {
      return !!col;
    }));
    var urls = $.map($('a.ad-title'), function (value) {
      return $(value).attr('href');
    });
    var campaignIds = urls.map(function (v) {
      return getParameterFromUrl(v, 'campaignId');
    });
    dataFrame = dataFrame.withColumn('推广计划ID', function (_, index) {
      return campaignIds[index];
    });
    var adgroupIds = urls.map(function (v) {
      return getParameterFromUrl(v, 'adGroupId');
    });
    dataFrame = dataFrame.withColumn('推广单元ID', function (_, index) {
      return adgroupIds[index];
    }); // const productIds = urls.map(v => getParameterFromUrl(v, 'productId'));
    // dataFrame = dataFrame.withColumn('宝贝ID', (_, index) => productIds[index]);

    var timeStr = moment().format('YYYY-MM-DD HH:mm:ss');
    dataFrame = dataFrame.withColumn('抓取时间', function () {
      return timeStr;
    });
    dataFrame = dataFrame.withColumn('Fetch SN', function () {
      return fetchSN;
    });
    var shopName = $('span.header-nickname-inside:nth-of-type(1)').text();
    dataFrame = dataFrame.withColumn('店铺名称', function () {
      return shopName;
    });
    dataFrame.show();
    return dataFrame;
  });

  _defineProperty(this, "saveData",
  /*#__PURE__*/
  function () {
    var _ref2 = _asyncToGenerator(
    /*#__PURE__*/
    regeneratorRuntime.mark(function _callee2(dataFrame) {
      var _require2, db;

      return regeneratorRuntime.wrap(function _callee2$(_context2) {
        while (1) {
          switch (_context2.prev = _context2.next) {
            case 0:
              _require2 = require('./db'), db = _require2.db;
              _context2.next = 3;
              return db['adgroups_log'].bulkPut(dataFrame.toCollection());

            case 3:
              _context2.next = 5;
              return db['headers'].put({
                table_name: 'adgroups_log',
                'columns': dataFrame.listColumns()
              });

            case 5:
            case "end":
              return _context2.stop();
          }
        }
      }, _callee2);
    }));

    return function (_x2) {
      return _ref2.apply(this, arguments);
    };
  }());

  this.findNewUrl = true;
  this.onDataFrameReady = this.saveData;
};

;
module.exports = AdgroupsPage;

},{"./db":7,"./helper":8}],2:[function(require,module,exports){
"use strict";

function asyncGeneratorStep(gen, resolve, reject, _next, _throw, key, arg) { try { var info = gen[key](arg); var value = info.value; } catch (error) { reject(error); return; } if (info.done) { resolve(value); } else { Promise.resolve(value).then(_next, _throw); } }

function _asyncToGenerator(fn) { return function () { var self = this, args = arguments; return new Promise(function (resolve, reject) { var gen = fn.apply(self, args); function _next(value) { asyncGeneratorStep(gen, resolve, reject, _next, _throw, "next", value); } function _throw(err) { asyncGeneratorStep(gen, resolve, reject, _next, _throw, "throw", err); } _next(undefined); }); }; }

function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } }

function _defineProperty(obj, key, value) { if (key in obj) { Object.defineProperty(obj, key, { value: value, enumerable: true, configurable: true, writable: true }); } else { obj[key] = value; } return obj; }

var DataFrame = dfjs.DataFrame;

var _require = require('./helper'),
    createUrlGetter = _require.createUrlGetter,
    extractDataAndSimplify = _require.extractDataAndSimplify,
    concat2DArray = _require.concat2DArray,
    getParameterFromUrl = _require.getParameterFromUrl;

var log = require('./logger');

var anchorFilter = function anchorFilter(ele) {
  /** 暂停状态,不抓取 */
  return $(ele).closest('tr').find('span.status-0').length <= 0;
};

var newUrlsGetter = createUrlGetter('.manage-common-table-container div.editor-content a', anchorFilter);

var CampaignsPage = function CampaignsPage() {
  var _this = this;

  _classCallCheck(this, CampaignsPage);

  _defineProperty(this, "id", 'Campaigns');

  _defineProperty(this, "onPageReady",
  /*#__PURE__*/
  function () {
    var _ref = _asyncToGenerator(
    /*#__PURE__*/
    regeneratorRuntime.mark(function _callee(fetchSN) {
      var dataFrame;
      return regeneratorRuntime.wrap(function _callee$(_context) {
        while (1) {
          switch (_context.prev = _context.next) {
            case 0:
              dataFrame = _this.parseData(fetchSN);
              _context.next = 3;
              return _this.onDataFrameReady(dataFrame);

            case 3:
            case "end":
              return _context.stop();
          }
        }
      }, _callee);
    }));

    return function (_x) {
      return _ref.apply(this, arguments);
    };
  }());

  _defineProperty(this, "triggerOnUrl", function (url) {
    return !!url && !!url.match(/(https:\/\/subway.simba.taobao.com)?\/?(#\!\/manage\/campaign\/index)(.*)/);
  });

  _defineProperty(this, "getUrlsToAdd", function () {
    return _this.findNewUrl ? newUrlsGetter() : [];
  });

  _defineProperty(this, "isPageReady", function () {
    var ret = $('.manage-common-table-container div.editor-content a').length > 0;
    log.debug('isPageReady:', ret);
    return ret;
  });

  _defineProperty(this, "saveData",
  /*#__PURE__*/
  function () {
    var _ref2 = _asyncToGenerator(
    /*#__PURE__*/
    regeneratorRuntime.mark(function _callee2(dataFrame) {
      var _require2, db;

      return regeneratorRuntime.wrap(function _callee2$(_context2) {
        while (1) {
          switch (_context2.prev = _context2.next) {
            case 0:
              _require2 = require('./db'), db = _require2.db;
              _context2.next = 3;
              return db['campaigns_log'].bulkPut(dataFrame.toCollection());

            case 3:
              _context2.next = 5;
              return db['headers'].put({
                table_name: 'campaigns_log',
                'columns': dataFrame.listColumns()
              });

            case 5:
            case "end":
              return _context2.stop();
          }
        }
      }, _callee2);
    }));

    return function (_x2) {
      return _ref2.apply(this, arguments);
    };
  }());

  _defineProperty(this, "parseData", function (fetchSN) {
    var leftHead = extractDataAndSimplify('table[left="true"] thead', 'tr', 'th');
    var leftData = extractDataAndSimplify('table[left="true"] tbody', 'tr[mxv]');
    var rightHead = extractDataAndSimplify('table[center="true"] thead', 'tr', 'th');
    var rightData = extractDataAndSimplify('table[center="true"] tbody', 'tr:not(.operation-tr):not(:last-of-type)', 'td');
    var columns = concat2DArray(leftHead, rightHead)[0];
    var data = concat2DArray(leftData, rightData);
    var dataFrame = new DataFrame(data, columns);
    dataFrame = dataFrame.restructure(columns.filter(function (col) {
      return !!col;
    }));
    var urls = $.map($('table[left="true"] tbody tr[mxv] .editor a'), function (value) {
      return $(value).attr('href');
    });
    var campaignIds = urls.map(function (v) {
      return getParameterFromUrl(v, 'campaignId');
    });
    dataFrame = dataFrame.withColumn('推广计划ID', function (_, index) {
      return campaignIds[index];
    });
    var timeStr = moment().format('YYYY-MM-DD HH:mm:ss');
    dataFrame = dataFrame.withColumn('抓取时间', function () {
      return timeStr;
    });
    dataFrame = dataFrame.withColumn('Fetch SN', function () {
      return fetchSN;
    });
    var shopName = $('span.header-nickname-inside:nth-of-type(1)').text();
    dataFrame = dataFrame.withColumn('店铺名称', function () {
      return shopName;
    });
    dataFrame.show();
    return dataFrame;
  });

  this.findNewUrl = true;
  this.onDataFrameReady = this.saveData;
};

module.exports = CampaignsPage;

},{"./db":7,"./helper":8,"./logger":10}],3:[function(require,module,exports){
"use strict";

function asyncGeneratorStep(gen, resolve, reject, _next, _throw, key, arg) { try { var info = gen[key](arg); var value = info.value; } catch (error) { reject(error); return; } if (info.done) { resolve(value); } else { Promise.resolve(value).then(_next, _throw); } }

function _asyncToGenerator(fn) { return function () { var self = this, args = arguments; return new Promise(function (resolve, reject) { var gen = fn.apply(self, args); function _next(value) { asyncGeneratorStep(gen, resolve, reject, _next, _throw, "next", value); } function _throw(err) { asyncGeneratorStep(gen, resolve, reject, _next, _throw, "throw", err); } _next(undefined); }); }; }

function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } }

function _defineProperty(obj, key, value) { if (key in obj) { Object.defineProperty(obj, key, { value: value, enumerable: true, configurable: true, writable: true }); } else { obj[key] = value; } return obj; }

var DataFrame = dfjs.DataFrame;

var log = require('./logger');

var _require = require('./helper'),
    concat2DArray = _require.concat2DArray,
    simplifyText = _require.simplifyText,
    extractDataFromTable = _require.extractDataFromTable,
    getParameterFromUrl = _require.getParameterFromUrl;

var KeywordsPage = function KeywordsPage() {
  var _this = this;

  _classCallCheck(this, KeywordsPage);

  _defineProperty(this, "id", 'Keywords');

  _defineProperty(this, "onPageReady",
  /*#__PURE__*/
  function () {
    var _ref = _asyncToGenerator(
    /*#__PURE__*/
    regeneratorRuntime.mark(function _callee(fetchSN) {
      var dataFrame;
      return regeneratorRuntime.wrap(function _callee$(_context) {
        while (1) {
          switch (_context.prev = _context.next) {
            case 0:
              dataFrame = _this.parseData(fetchSN);
              _context.next = 3;
              return _this.onDataFrameReady(dataFrame);

            case 3:
            case "end":
              return _context.stop();
          }
        }
      }, _callee);
    }));

    return function (_x) {
      return _ref.apply(this, arguments);
    };
  }());

  _defineProperty(this, "triggerOnUrl", function (url) {
    return !!url && !!url.match(/(https:\/\/subway.simba.taobao.com)?\/?(#\!\/manage\/adgroup\/detail)(.*)/);
  });

  _defineProperty(this, "getUrlsToAdd", function () {
    return [];
  });

  _defineProperty(this, "isPageReady", function () {
    return $('.table-td .bp-table tr').length > 0;
  });

  _defineProperty(this, "saveData",
  /*#__PURE__*/
  function () {
    var _ref2 = _asyncToGenerator(
    /*#__PURE__*/
    regeneratorRuntime.mark(function _callee2(dataFrame) {
      var _require2, db;

      return regeneratorRuntime.wrap(function _callee2$(_context2) {
        while (1) {
          switch (_context2.prev = _context2.next) {
            case 0:
              _require2 = require('./db'), db = _require2.db;
              _context2.next = 3;
              return db['keywords_log'].bulkPut(dataFrame.toCollection());

            case 3:
              _context2.next = 5;
              return db['headers'].put({
                table_name: 'keywords_log',
                'columns': dataFrame.listColumns()
              });

            case 5:
            case "end":
              return _context2.stop();
          }
        }
      }, _callee2);
    }));

    return function (_x2) {
      return _ref2.apply(this, arguments);
    };
  }());

  _defineProperty(this, "parseData", function (fetchSN) {
    var REMOVE_SIGN = '%TO_REMOVE%';
    var leftColumns = ["".concat(REMOVE_SIGN, "_1"), '状态', "".concat(REMOVE_SIGN, "_2"), '关键词'];
    var leftData = extractDataFromTable('.freeze-td table.bp-table', 'tr', 'td');
    var hasSmartLibrary = false;

    if (leftData.length > 0) {
      var row = leftData[0];
      var result = row[1].match(/(.*)(流量智选词包)/);

      if (result) {
        hasSmartLibrary = true;
        row[1] = result[1];
        row[3] = result[2];
      }
    }

    var rightHead = extractDataFromTable('table.bp-table.scroll-th', 'tr', 'th'); // log.debug('rightHead:', rightHead);

    var rightColumns = rightHead[0];
    var columnsToReplace = ['质量分(PC)', '质量分(移动)', '排名(PC)', '排名(移动)', '出价(PC)', '出价(移动)'];
    rightColumns.splice.apply(rightColumns, [0, columnsToReplace.length].concat(columnsToReplace));
    var rightData = extractDataFromTable('.table-td .bp-table', 'tr', 'td');
    var columns = leftColumns.concat(rightColumns);
    var data = concat2DArray(leftData, rightData);
    var dataFrame = new DataFrame(data, columns);
    dataFrame = dataFrame.restructure(columns.filter(function (col) {
      return !col.includes(REMOVE_SIGN);
    }));
    dataFrame = dataFrame.withColumn('关键词', function (row) {
      return row.get('关键词').replace(/查看历史报表关键词全景图$/, '');
    });
    var campaignId = getParameterFromUrl(location.href, 'campaignId');
    dataFrame = dataFrame.withColumn('推广计划ID', function () {
      return campaignId;
    });
    var adgroupID = getParameterFromUrl(location.href, 'adGroupId');
    dataFrame = dataFrame.withColumn('推广单元ID', function () {
      return adgroupID;
    });
    var productUrl = $('article.box > a.imgcn80').attr('href');
    var productId = getParameterFromUrl(productUrl, 'id');
    dataFrame = dataFrame.withColumn('宝贝ID', function () {
      return productId;
    });
    var timeStr = moment().format('YYYY-MM-DD HH:mm:ss');
    dataFrame = dataFrame.withColumn('抓取时间', function () {
      return timeStr;
    });
    dataFrame = dataFrame.withColumn('Fetch SN', function () {
      return fetchSN;
    });
    var shopName = $('span.header-nickname-inside:nth-of-type(1)').text();
    dataFrame = dataFrame.withColumn('店铺名称', function () {
      return shopName;
    });
    dataFrame.show();
    return dataFrame;
  });

  this.findNewUrl = true;
  this.onDataFrameReady = this.saveData;
};

module.exports = KeywordsPage;

},{"./db":7,"./helper":8,"./logger":10}],4:[function(require,module,exports){
"use strict";

function asyncGeneratorStep(gen, resolve, reject, _next, _throw, key, arg) { try { var info = gen[key](arg); var value = info.value; } catch (error) { reject(error); return; } if (info.done) { resolve(value); } else { Promise.resolve(value).then(_next, _throw); } }

function _asyncToGenerator(fn) { return function () { var self = this, args = arguments; return new Promise(function (resolve, reject) { var gen = fn.apply(self, args); function _next(value) { asyncGeneratorStep(gen, resolve, reject, _next, _throw, "next", value); } function _throw(err) { asyncGeneratorStep(gen, resolve, reject, _next, _throw, "throw", err); } _next(undefined); }); }; }

function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } }

function _defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } }

function _createClass(Constructor, protoProps, staticProps) { if (protoProps) _defineProperties(Constructor.prototype, protoProps); if (staticProps) _defineProperties(Constructor, staticProps); return Constructor; }

function _defineProperty(obj, key, value) { if (key in obj) { Object.defineProperty(obj, key, { value: value, enumerable: true, configurable: true, writable: true }); } else { obj[key] = value; } return obj; }

var _require = require('./retry'),
    check = _require.check,
    retry = _require.retry,
    waitUntil = _require.waitUntil;

var log = require('./logger');

var Page =
/*#__PURE__*/
function () {
  function Page() {
    _classCallCheck(this, Page);

    _defineProperty(this, "id", '');
  }

  _createClass(Page, [{
    key: "triggerOnUrl",
    value: function triggerOnUrl(url) {
      return false;
    }
  }, {
    key: "isPageReady",
    value: function isPageReady() {
      return false;
    }
  }, {
    key: "onPageReady",
    value: function onPageReady() {
      return true;
    }
  }, {
    key: "getUrlsToAdd",
    value: function getUrlsToAdd() {
      return [];
    }
  }]);

  return Page;
}();
/**
 * @typedef {Object} ResourceOption
 * @property {string} id
 * @property {function} triggerOnUrl 当前配置适用于哪个 URL,`(url) => boolean`
 * @property {function} isPageReady 网页是否加载完全,`(document) => boolean`
 * @property {function} onPageReady 网页已经加载成功,需要在这里处理数据,或者执行某些操作。`(document) => boolean`
 * @property {function} getUrlsToAdd 当前网页有哪些链接该加入队列 `(document) => string[]`
 * 
 * @typedef {Object} LoginOption
 * @property {string} loginPageURL
 * @property {function} needLogin 根据当前网页内容决定是否需要重新登录。`(document) => boolean`
 * @property {function} isLoginPageReady 登陆页面是否已经加载成功。`(document) => boolean`
 * @property {function} isLoginSuccess 登陆是否成功。`(document) => boolean`
 * @property {function} doLogin 执行登陆。`(document) => Promise`
 */

/**
 * @typedef {Object} CrawlerOption 
 * @property  {string} startPageURL
 * @property  {function} gotoUrl `(url) => Promise`
 * @property  {ResourceOption[]} pageList 资源列表
 * @property  {LoginOption} [login] 登陆配置,如果不需要登陆,可以不设置
 * @property  {number} [maxWait=10000] 加载网页时最长加载时间,单位:ms,
 * @property  {number} [retryCount=3] 网页加载未成功时,最多重试次数,
 * @property  {number} [operateInterval=1000] 每次操作间隔时间 
 * @property  {function} [onCrawlComplete] `() => void`
 * @property  {function} [onPageStart] `() => void`
 * @property  {function} [onPageComplete] `() => void`
 * @property  {number} [maxWait=8000] 每次加载完网页后停留时间 
 * @property  {number} [minWait=3000] 每次加载完网页后停留时间 
 */

/**
 * 爬虫调度器
 */


var Crawler =
/**
 * @type {CrawlerOption}
 */
// 每启动一次 crawler,将分配一个新的 SN,作为此次抓取的唯一标识符。
// 将要抓取的 URL
// 正在抓取的 URL

/**
 * @param {CrawlerOption} options
 */
function Crawler(options) {
  var _this = this;

  _classCallCheck(this, Crawler);

  _defineProperty(this, "options", _defineProperty({
    startPageURL: '',
    pageList: [],
    maxWait: 10000,
    retryCount: 3,
    operateInterval: 1000,
    minWait: 3000
  }, "maxWait", 8000));

  _defineProperty(this, "fetchSN", new Date().getTime());

  _defineProperty(this, "urlList", []);

  _defineProperty(this, "currUrl", null);

  _defineProperty(this, "crawledUrlSet", new Set());

  _defineProperty(this, "isCrawling", false);

  _defineProperty(this, "isPause", false);

  _defineProperty(this, "isToBeClear", false);

  _defineProperty(this, "clear", function () {
    _this.urlList = [];

    _this.crawledUrlSet.clear();

    _this.isPause = false;
    _this.isCrawling = false;
    _this.isToBeClear = true;
  });

  _defineProperty(this, "start",
  /*#__PURE__*/
  _asyncToGenerator(
  /*#__PURE__*/
  regeneratorRuntime.mark(function _callee() {
    return regeneratorRuntime.wrap(function _callee$(_context) {
      while (1) {
        switch (_context.prev = _context.next) {
          case 0:
            log.debug('_start', _this.urlList);
            _this.urlList = [_this.options.startPageURL];
            _this.isPause = false;
            _this.isToBeClear = false;
            _this.isCrawling = true;
            _this.fetchSN = new Date().getTime();
            return _context.abrupt("return", _this._start());

          case 7:
          case "end":
            return _context.stop();
        }
      }
    }, _callee);
  })));

  _defineProperty(this, "restoreFromSavedState",
  /*#__PURE__*/
  function () {
    var _ref2 = _asyncToGenerator(
    /*#__PURE__*/
    regeneratorRuntime.mark(function _callee2(urlList, crawledUrlSet) {
      return regeneratorRuntime.wrap(function _callee2$(_context2) {
        while (1) {
          switch (_context2.prev = _context2.next) {
            case 0:
              _this.urlList = urlList;
              _this.crawledUrlSet = crawledUrlSet;
              _this.isCrawling = true;
              _this.isPause = false;
              _this.isToBeClear = false;
              _this.fetchSN = new Date().getTime();
              return _context2.abrupt("return", _this._start());

            case 7:
            case "end":
              return _context2.stop();
          }
        }
      }, _callee2);
    }));

    return function (_x, _x2) {
      return _ref2.apply(this, arguments);
    };
  }());

  _defineProperty(this, "pause", function () {
    log.debug('_pause: this.urlList:', _this.urlList);
    _this.isPause = true;
  });

  _defineProperty(this, "resume",
  /*#__PURE__*/
  _asyncToGenerator(
  /*#__PURE__*/
  regeneratorRuntime.mark(function _callee3() {
    return regeneratorRuntime.wrap(function _callee3$(_context3) {
      while (1) {
        switch (_context3.prev = _context3.next) {
          case 0:
            log.debug('_resume: this.urlList:', _this.urlList);
            _this.isPause = false;
            return _context3.abrupt("return", _this._start());

          case 3:
          case "end":
            return _context3.stop();
        }
      }
    }, _callee3);
  })));

  _defineProperty(this, "_start",
  /*#__PURE__*/
  _asyncToGenerator(
  /*#__PURE__*/
  regeneratorRuntime.mark(function _callee4() {
    var _this$options, minWait, maxWait, onPageStart, onPageComplete, onCrawlComplete, timeToWait, url;

    return regeneratorRuntime.wrap(function _callee4$(_context4) {
      while (1) {
        switch (_context4.prev = _context4.next) {
          case 0:
            log.debug('_start: begin. this.urlList:', _this.urlList);
            _this$options = _this.options, minWait = _this$options.minWait, maxWait = _this$options.maxWait, onPageStart = _this$options.onPageStart, onPageComplete = _this$options.onPageComplete, onCrawlComplete = _this$options.onCrawlComplete;
            timeToWait = Math.floor(Math.random() * (maxWait - minWait)) + minWait;

            if (!(_this.urlList.length > 0)) {
              _context4.next = 11;
              break;
            }

            url = _this.urlList.splice(0, 1)[0];
            _this.currUrl = url;

            _this.crawledUrlSet.add(url);

            if (onPageStart) onPageStart(url);
            return _context4.abrupt("return", _this._crawlPage(url)["catch"](function (reason) {
              /** 如果中间失败了,还是继续下一波,不要影响下一条任务 */
              log.error('_start: crawl page fail:', url, ', reason:', reason);
            }).then(function () {
              return new Promise(function (resolve, _) {
                _this.currUrl = null;
                if (onPageComplete) onPageComplete(url);
                log.debug('_stat: wait time = ', timeToWait);
                setTimeout(resolve, timeToWait);
              });
            }).then(function () {
              if (_this.isToBeClear) {
                if (onCrawlComplete) onCrawlComplete();
                return Promise.reject(Crawler.QUIT_REASON_CLEAR);
              } else if (_this.isPause) {
                return Promise.reject(Crawler.QUIT_REASON_PAUSE);
              } else {
                return _this._start();
              }
            }));

          case 11:
            _this.isCrawling = false;
            if (onCrawlComplete) onCrawlComplete();

          case 13:
          case "end":
            return _context4.stop();
        }
      }
    }, _callee4);
  })));

  _defineProperty(this, "_openPageOnce",
  /*#__PURE__*/
  function () {
    var _ref5 = _asyncToGenerator(
    /*#__PURE__*/
    regeneratorRuntime.mark(function _callee5(url, isPageReady) {
      return regeneratorRuntime.wrap(function _callee5$(_context5) {
        while (1) {
          switch (_context5.prev = _context5.next) {
            case 0:
              return _context5.abrupt("return", _this.options.gotoUrl(url).then(function () {
                return waitUntil(isPageReady, _this.options.maxWait);
              }));

            case 1:
            case "end":
              return _context5.stop();
          }
        }
      }, _callee5);
    }));

    return function (_x3, _x4) {
      return _ref5.apply(this, arguments);
    };
  }());

  _defineProperty(this, "_runFunctionAndLoginIfNeed",
  /*#__PURE__*/
  function () {
    var _ref6 = _asyncToGenerator(
    /*#__PURE__*/
    regeneratorRuntime.mark(function _callee6(fn) {
      var _len,
          args,
          _key,
          login,
          needLogin,
          _args6 = arguments;

      return regeneratorRuntime.wrap(function _callee6$(_context6) {
        while (1) {
          switch (_context6.prev = _context6.next) {
            case 0:
              for (_len = _args6.length, args = new Array(_len > 1 ? _len - 1 : 0), _key = 1; _key < _len; _key++) {
                args[_key - 1] = _args6[_key];
              }

              login = _this.options.login;

              if (login) {
                _context6.next = 4;
                break;
              }

              return _context6.abrupt("return", fn.apply(void 0, args));

            case 4:
              needLogin = login.needLogin;
              return _context6.abrupt("return", fn.apply(void 0, args)["catch"](function () {
                if (needLogin()) {
                  return _this.login().then(function () {
                    return fn.apply(void 0, args);
                  });
                } else {
                  return Promise.reject();
                }
              }));

            case 6:
            case "end":
              return _context6.stop();
          }
        }
      }, _callee6);
    }));

    return function (_x5) {
      return _ref6.apply(this, arguments);
    };
  }());

  _defineProperty(this, "_openPageAndLoginIfNeed",
  /*#__PURE__*/
  function () {
    var _ref7 = _asyncToGenerator(
    /*#__PURE__*/
    regeneratorRuntime.mark(function _callee8(url) {
      var pageList, page, isPageReady, onPageReady, getUrlsToAdd;
      return regeneratorRuntime.wrap(function _callee8$(_context8) {
        while (1) {
          switch (_context8.prev = _context8.next) {
            case 0:
              // log.debug('_openPageAndLoginIfNeed: ', url)
              pageList = _this.options.pageList;
              page = pageList.find(function (r) {
                return r.triggerOnUrl(url);
              });
              isPageReady = page.isPageReady, onPageReady = page.onPageReady, getUrlsToAdd = page.getUrlsToAdd;
              return _context8.abrupt("return", _this._runFunctionAndLoginIfNeed(_this._openPageOnce, url, isPageReady).then(
              /*#__PURE__*/
              _asyncToGenerator(
              /*#__PURE__*/
              regeneratorRuntime.mark(function _callee7() {
                var newUrls;
                return regeneratorRuntime.wrap(function _callee7$(_context7) {
                  while (1) {
                    switch (_context7.prev = _context7.next) {
                      case 0:
                        _context7.next = 2;
                        return onPageReady(_this.fetchSN);

                      case 2:
                        newUrls = getUrlsToAdd();
                        newUrls = newUrls.filter(function (u) {
                          return !_this.crawledUrlSet.has(u);
                        });
                        log.debug('_openPageAndLoginIfNeed. newUrls:', newUrls);
                        _this.urlList = _this.urlList.concat(newUrls);

                      case 6:
                      case "end":
                        return _context7.stop();
                    }
                  }
                }, _callee7);
              }))));

            case 4:
            case "end":
              return _context8.stop();
          }
        }
      }, _callee8);
    }));

    return function (_x6) {
      return _ref7.apply(this, arguments);
    };
  }());

  _defineProperty(this, "_crawlPage",
  /*#__PURE__*/
  function () {
    var _ref9 = _asyncToGenerator(
    /*#__PURE__*/
    regeneratorRuntime.mark(function _callee9(url) {
      var retryCount;
      return regeneratorRuntime.wrap(function _callee9$(_context9) {
        while (1) {
          switch (_context9.prev = _context9.next) {
            case 0:
              log.debug('_crawlPage: ', url);
              retryCount = _this.options.retryCount;
              return _context9.abrupt("return", retry(function () {
                return _this._openPageAndLoginIfNeed(url);
              }, retryCount));

            case 3:
            case "end":
              return _context9.stop();
          }
        }
      }, _callee9);
    }));

    return function (_x7) {
      return _ref9.apply(this, arguments);
    };
  }());

  _defineProperty(this, "login",
  /*#__PURE__*/
  _asyncToGenerator(
  /*#__PURE__*/
  regeneratorRuntime.mark(function _callee10() {
    var _this$options2, login, maxWait, loginPageURL, isLoginPageReady, isLoginSuccess, doLogin, p;

    return regeneratorRuntime.wrap(function _callee10$(_context10) {
      while (1) {
        switch (_context10.prev = _context10.next) {
          case 0:
            log.debug('login');
            _this$options2 = _this.options, login = _this$options2.login, maxWait = _this$options2.maxWait;

            if (login) {
              _context10.next = 6;
              break;
            }

            return _context10.abrupt("return");

          case 6:
            loginPageURL = login.loginPageURL, isLoginPageReady = login.isLoginPageReady, isLoginSuccess = login.isLoginSuccess, doLogin = login.doLogin;
            p = null;

            if (isLoginPageReady()) {
              log.debug('login: isLoginPageReady.');
              p = doLogin();
            } else {
              log.debug('login: needReload');
              p = _this._openPageOnce(loginPageURL, isLoginPageReady).then(doLogin);
            }

            return _context10.abrupt("return", p.then(function () {
              return waitUntil(isLoginSuccess, maxWait);
            }));

          case 10:
          case "end":
            return _context10.stop();
        }
      }
    }, _callee10);
  })));

  for (var key in options) {
    this.options[key] = options[key];
  }
};

_defineProperty(Crawler, "QUIT_REASON_CLEAR", 'quit_reason_clear');

_defineProperty(Crawler, "QUIT_REASON_PAUSE", 'quit_reason_pause');

module.exports = {
  Crawler: Crawler,
  Page: Page
};

},{"./logger":10,"./retry":11}],5:[function(require,module,exports){
"use strict";

function _toConsumableArray(arr) { return _arrayWithoutHoles(arr) || _iterableToArray(arr) || _nonIterableSpread(); }

function _nonIterableSpread() { throw new TypeError("Invalid attempt to spread non-iterable instance"); }

function _iterableToArray(iter) { if (Symbol.iterator in Object(iter) || Object.prototype.toString.call(iter) === "[object Arguments]") return Array.from(iter); }

function _arrayWithoutHoles(arr) { if (Array.isArray(arr)) { for (var i = 0, arr2 = new Array(arr.length); i < arr.length; i++) { arr2[i] = arr[i]; } return arr2; } }

var log = require('./logger');

var KEY_CRAWLER_STATE = 'key_crawler_state';

function saveCrawler(crawler) {
  GM_setValue(KEY_CRAWLER_STATE, JSON.stringify({
    isCrawling: crawler.isCrawling,
    currUrl: crawler.currUrl,
    urlList: crawler.urlList,
    crawledUrls: _toConsumableArray(crawler.crawledUrlSet)
  }));
}

function hasUnfinishedTask() {
  var state = JSON.parse(GM_getValue(KEY_CRAWLER_STATE, null));
  log.debug("hasUnfinishedTask. state = ", state);
  return state && state.isCrawling;
}

function restoreCrawler(crawler) {
  var state = JSON.parse(GM_getValue(KEY_CRAWLER_STATE, null));
  log.debug("restoreCrawler. state = ", state);
  var urlList = state.urlList,
      currUrl = state.currUrl,
      crawledUrls = state.crawledUrls;
  var crawledUrlSet = new Set(crawledUrls);

  if (currUrl) {
    urlList.splice(0, 0, currUrl);
    crawledUrlSet["delete"](currUrl);
  }

  return crawler.restoreFromSavedState(urlList, crawledUrlSet);
}

function clearCrawler() {
  GM_deleteValue(KEY_CRAWLER_STATE);
}

module.exports = {
  saveCrawler: saveCrawler,
  clearCrawler: clearCrawler,
  hasUnfinishedTask: hasUnfinishedTask,
  restoreCrawler: restoreCrawler
};

},{"./logger":10}],6:[function(require,module,exports){
"use strict";

var log = require('./logger');

var KEY_CRAWLER_SCHEDULER = 'key_crawler_scheduler'; // === scheduler 存储和恢复 === 

function saveCrawlerScheduler(text) {
  GM_setValue(KEY_CRAWLER_SCHEDULER, text);
}

function clearCrawlerScheduler() {
  GM_deleteValue(KEY_CRAWLER_SCHEDULER);
}

function restoreScrawlerScheduler(startCrawlerSchedulerByText) {
  var text = GM_getValue(KEY_CRAWLER_SCHEDULER);
  log.debug("restoreScrawlerScheduler. text = ".concat(text, " "));

  if (text) {
    startCrawlerSchedulerByText(text);
  }
} // === scheduler 存储和恢复 === 


module.exports = {
  saveCrawlerScheduler: saveCrawlerScheduler,
  clearCrawlerScheduler: clearCrawlerScheduler,
  restoreScrawlerScheduler: restoreScrawlerScheduler
};

},{"./logger":10}],7:[function(require,module,exports){
"use strict";

function asyncGeneratorStep(gen, resolve, reject, _next, _throw, key, arg) { try { var info = gen[key](arg); var value = info.value; } catch (error) { reject(error); return; } if (info.done) { resolve(value); } else { Promise.resolve(value).then(_next, _throw); } }

function _asyncToGenerator(fn) { return function () { var self = this, args = arguments; return new Promise(function (resolve, reject) { var gen = fn.apply(self, args); function _next(value) { asyncGeneratorStep(gen, resolve, reject, _next, _throw, "next", value); } function _throw(err) { asyncGeneratorStep(gen, resolve, reject, _next, _throw, "throw", err); } _next(undefined); }); }; }

var db = new Dexie('ant_log');

function initSchema() {
  db.version(1).stores({
    'campaigns_log': '++,推广计划ID',
    'adgroups_log': '++,推广计划ID,推广单元ID',
    'keywords_log': '++,推广计划ID,推广单元ID,关键词'
  });
  db.version(2).stores({
    'campaigns_log': '++,推广计划ID',
    'adgroups_log': '++,推广计划ID,推广单元ID',
    'keywords_log': '++,推广计划ID,推广单元ID,关键词',
    'headers': '&table_name'
  });
}

function clear() {
  return _clear.apply(this, arguments);
}

function _clear() {
  _clear = _asyncToGenerator(
  /*#__PURE__*/
  regeneratorRuntime.mark(function _callee() {
    return regeneratorRuntime.wrap(function _callee$(_context) {
      while (1) {
        switch (_context.prev = _context.next) {
          case 0:
            _context.next = 2;
            return db['campaigns_log'].clear();

          case 2:
            _context.next = 4;
            return db['adgroups_log'].clear();

          case 4:
            _context.next = 6;
            return db['keywords_log'].clear();

          case 6:
          case "end":
            return _context.stop();
        }
      }
    }, _callee);
  }));
  return _clear.apply(this, arguments);
}

initSchema();
module.exports = {
  db: db,
  clear: clear,
  initSchema: initSchema
};

},{}],8:[function(require,module,exports){
"use strict";

var createUrlGetter = function createUrlGetter(cssSelector) {
  var filter = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : undefined;
  return function () {
    var list = [];
    var queryList = $(cssSelector);
    queryList.each(function () {
      if (filter && !filter(this)) return;
      var url = $(this).attr('href');
      if (!url) return;

      if (url.startsWith('#')) {
        list.push('https://subway.simba.taobao.com/' + url);
      } else if (url.startsWith('https')) {
        list.push(url);
      }
    });
    return list;
  };
};

function extractDataFromTable(table) {
  var row = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 'tr';
  var cell = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 'td';
  var textExtractor = arguments.length > 3 && arguments[3] !== undefined ? arguments[3] : undefined;
  var ret = [];
  $(table).find(row).each(function () {
    var row = [];
    $(this).find(cell).each(function () {
      textExtractor = textExtractor || function (ele) {
        return simplifyText($(ele).text());
      };

      row.push(textExtractor(this));
    });
    ret.push(row);
  });
  return ret;
}

function simplifyText(str) {
  var ret = str || '';
  ret = ret.replace(/[\s\r\n\t\ue000-\uffff]|(󰅂)|(Ũ)/g, '');
  ret = ret.trim();
  return ret;
}

function extractDataAndSimplify(table) {
  var row = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 'tr';
  var cell = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 'td';
  return extractDataFromTable(table, row, cell);
}

function concat2DArray(left, right) {
  return left.map(function (value, index) {
    return value.concat(right[index]);
  });
}

function getParameterFromUrl(url, name) {
  name = name.replace(/[\[]/, "\\\[").replace(/[\]]/, "\\\]");
  var regexS = "[\\?&]" + name + "=([^&#]*)";
  var regex = new RegExp(regexS);
  var results = regex.exec(url);
  return results == null ? null : results[1];
}

function downloadXls(data, fileName) {
  var hiddenElement = document.createElement('a');
  hiddenElement.href = 'data:text/csv;charset=utf-8,' + encodeURI(csv);
  hiddenElement.target = '_blank';
  hiddenElement.download = 'people.csv';
  hiddenElement.click();
}

module.exports = {
  createUrlGetter: createUrlGetter,
  extractDataFromTable: extractDataFromTable,
  simplifyText: simplifyText,
  extractDataAndSimplify: extractDataAndSimplify,
  concat2DArray: concat2DArray,
  getParameterFromUrl: getParameterFromUrl
};

},{}],9:[function(require,module,exports){
'use strict';

function asyncGeneratorStep(gen, resolve, reject, _next, _throw, key, arg) { try { var info = gen[key](arg); var value = info.value; } catch (error) { reject(error); return; } if (info.done) { resolve(value); } else { Promise.resolve(value).then(_next, _throw); } }

function _asyncToGenerator(fn) { return function () { var self = this, args = arguments; return new Promise(function (resolve, reject) { var gen = fn.apply(self, args); function _next(value) { asyncGeneratorStep(gen, resolve, reject, _next, _throw, "next", value); } function _throw(err) { asyncGeneratorStep(gen, resolve, reject, _next, _throw, "throw", err); } _next(undefined); }); }; }

var DataFrame = dfjs.DataFrame;

var log = require('./logger');

var _require = require('./crawler'),
    Crawler = _require.Crawler;

var _require2 = require('./retry'),
    delayDo = _require2.delayDo;

var crawlerSaver = require('./crawlerSaver');

var schedulerSaver = require('./crawlerSchedulerSaver');

function initConfigPannel() {
  GM_config.init({
    'id': 'Taobao_Crawler_Config',
    'fields': {
      hidden1: {
        section: ['操作等待时间', '每加载一个网页后,等待一些时间,防止访问过快而被淘宝官方察觉.'],
        type: 'hidden'
      },
      'minWait': {
        'label': '最少等待时间(秒)',
        'type': 'int',
        'default': '3'
      },
      'maxWait': {
        'label': '最长等待时间(秒)',
        'type': 'int',
        'default': '5'
      },
      hidden2: {
        'section': ['定时器配置', '配置方法请参考这里:<a href="https://bunkat.github.io/later/parsers.html#text" target=”_blank”>配置帮助</a>'],
        type: 'hidden'
      },
      'scrawlScheduleText': {
        'label': '抓取定时器配置',
        'type': 'text',
        'default': 'at 23:50 also every 1 hour between 1 and 23'
      },
      'downloadScheduleText': {
        'label': '下载定时器配置',
        'type': 'text',
        'default': 'at 23:58'
      },
      hidden3: {
        'section': ['淘宝账户配置', '当登陆状态实效时,需要重新登陆。'],
        type: 'hidden'
      },
      'taobaoAccount': {
        'label': '直通车账户',
        'type': 'text',
        'default': ''
      },
      'taobaoPWD': {
        'label': '直通车密码',
        'type': 'text',
        'default': ''
      }
    }
  });
}

var loginOptions = {
  loginPageURL: 'https://subway.simba.taobao.com/indexnew.jsp',
  // mx-view="common-home/views/pages/home/login"
  needLogin: function needLogin() {
    var mainWindow = $('div.home-body iframe').length > 0;
    var subWindow = $('#J_LoginBox .bd').length > 0;

    if (subWindow) {
      subWindow = $('#J_LoginBox .bd').css('display') !== 'none';
    }

    return mainWindow || subWindow;
  },
  isLoginPageReady: function isLoginPageReady() {
    return true;
  },
  isLoginSuccess: function isLoginSuccess() {
    return $('#J_LoginBox .bd').css('display') === 'none';
  },
  doLogin: function () {
    var _doLogin = _asyncToGenerator(
    /*#__PURE__*/
    regeneratorRuntime.mark(function _callee() {
      return regeneratorRuntime.wrap(function _callee$(_context) {
        while (1) {
          switch (_context.prev = _context.next) {
            case 0:
              _context.next = 2;
              return delayDo(function () {
                var account = GM_config.get('taobaoAccount');
                log.debug('doLogin. type account: ', account);
                $('#J_StaticForm input#TPL_username_1').val(account);
              }, 1000);

            case 2:
              _context.next = 4;
              return delayDo(function () {
                var pwd = GM_config.get('taobaoPWD');
                log.debug('doLogin. type pwd: ', pwd);
                $('#J_StaticForm input#TPL_password_1').val(pwd);
              }, 1000);

            case 4:
              _context.next = 6;
              return delayDo(function () {
                log.debug('doLogin. submit');
                $('button#J_SubmitStatic').click();
              }, 2000);

            case 6:
            case "end":
              return _context.stop();
          }
        }
      }, _callee);
    }));

    function doLogin() {
      return _doLogin.apply(this, arguments);
    }

    return doLogin;
  }()
};

function createCrawlerOptions() {
  var KeywordsPage = require('./KeywordsPage');

  var CampaignsPage = require('./CampaignsPage');

  var AdgroupsPage = require('./AdgroupsPage');

  var options = {
    startPageURL: 'https://subway.simba.taobao.com/#!/manage/campaign/index',
    // startPageURL: 'https://subway.simba.taobao.com/#!/manage/campaign/detail?campaignId=40195486&start=2020-01-14&end=2020-01-14',
    minWait: (GM_config.get('minWait') || 3) * 1000,
    maxWait: (GM_config.get('maxWait') || 5) * 1000,
    gotoUrl: function () {
      var _gotoUrl = _asyncToGenerator(
      /*#__PURE__*/
      regeneratorRuntime.mark(function _callee2(url) {
        return regeneratorRuntime.wrap(function _callee2$(_context2) {
          while (1) {
            switch (_context2.prev = _context2.next) {
              case 0:
                log.debug('gotoUrl:', url);
                location.href = url;

              case 2:
              case "end":
                return _context2.stop();
            }
          }
        }, _callee2);
      }));

      function gotoUrl(_x) {
        return _gotoUrl.apply(this, arguments);
      }

      return gotoUrl;
    }(),
    pageList: [new CampaignsPage(), new AdgroupsPage(), new KeywordsPage()],
    login: loginOptions,
    onPageStart: function onPageStart() {
      crawlerSaver.saveCrawler(currRunningCrawler);
    },
    onCrawlComplete: function onCrawlComplete() {
      crawlerSaver.clearCrawler();
    }
  };
  return options;
}

function createDefaultCrawler() {
  var options = createCrawlerOptions();
  return new Crawler(options);
}

function createOnePageCrawler() {
  var options = createCrawlerOptions();
  var pageList = options.pageList;
  var url = window.location.href;
  var newPageList = pageList.filter(function (p) {
    return p.triggerOnUrl(url);
  });
  newPageList.forEach(function (p) {
    log.debug('createOnePageCrawler: id = ', p.id);
    p.findNewUrl = false;

    p.onDataFrameReady =
    /*#__PURE__*/
    function () {
      var _ref = _asyncToGenerator(
      /*#__PURE__*/
      regeneratorRuntime.mark(function _callee3(dataFrame) {
        var workbook, data, option, sheet, timeStr, prefix;
        return regeneratorRuntime.wrap(function _callee3$(_context3) {
          while (1) {
            switch (_context3.prev = _context3.next) {
              case 0:
                workbook = XLSX.utils.book_new();
                data = dataFrame.toCollection(); // const header = await db['headers'].where({ 'table_name': tableName }).first();

                option = undefined; // header ? { header } : undefined;

                sheet = XLSX.utils.json_to_sheet(data, option);
                XLSX.utils.book_append_sheet(workbook, sheet, p.id);
                timeStr = moment().format('YYYY-MM-DD_HH-mm-ss');
                prefix = p.id;
                XLSX.writeFile(workbook, "".concat(prefix, "_").concat(timeStr, ".xls"));

              case 8:
              case "end":
                return _context3.stop();
            }
          }
        }, _callee3);
      }));

      return function (_x2) {
        return _ref.apply(this, arguments);
      };
    }();
  });
  options.pageList = newPageList;
  options.startPageURL = url;

  options.onPageStart = function () {};

  options.onCrawlComplete = function () {};

  return new Crawler(options);
}

function crawlCurrPage() {
  var crawler = createOnePageCrawler();
  crawler.start();
}

var GMMenus = [{
  name: '启动定时抓取',
  fn: startCrawlerScheduler,
  accessKey: 'start'
}, {
  name: '终止定时抓取',
  fn: stopCrawlerScheduler,
  accessKey: 'end'
}, {
  name: '仅抓取一次',
  fn: scrawlOnce,
  accessKey: 'once'
}, {
  name: '仅抓取此页',
  fn: crawlCurrPage,
  accessKey: 'curr'
}, {
  name: '下载缓存的数据',
  fn: downloadData,
  accessKey: 'download'
}, {
  name: '清空缓存的数据',
  fn: clearData,
  accessKey: 'clear'
}, {
  name: '爬虫配置',
  fn: function fn() {
    return GM_config.open();
  },
  accessKey: 'config'
}];
GMMenus.forEach(function (m) {
  GM_registerMenuCommand(m.name, m.fn, m.accessKey);
});

function startCrawlerSchedulerByText(text) {
  if (crawlerScheduler) {
    alert("爬虫正在进行中。请勿重复启动");
    return;
  }

  try {
    var sched = later.parse.text(text);
    schedulerSaver.saveCrawlerScheduler(text);
    crawlerScheduler = later.setInterval(scrawlOnce, sched);
    log.debug('startCrawlerScheduler: next 24 occurences: ', later.schedule(sched).next(24));
  } catch (_unused) {
    alert('定时器配置错误,请重新配置');
  }
}

function startCrawlerScheduler() {
  var text = GM_config.get('scrawlScheduleText') || 'at 23:50 also every 1 hour between 1 and 23';
  startCrawlerSchedulerByText(text);
}

function stopCrawlerScheduler() {
  schedulerSaver.clearCrawlerScheduler();

  if (!crawlerScheduler) {
    alert("定时器尚未启动.");
    return;
  }

  crawlerScheduler.clear();
  crawlerScheduler = null;
  log.debug('stopCrawlerScheduler');
}

var lastScrawlOnceTime = 0;

function scrawlOnce() {
  var currTime = new Date().getTime();

  if (lastScrawlOnceTime + 60 * 1000 > currTime) {
    //fixme: later.js 有 bug,导致回调函数被重复调用 N 次。这里先打个补丁,后面 later.js 修复后再更新。
    console.warn('Scrawl too many times in a short time!');
    return;
  }

  lastScrawlOnceTime = currTime;

  if (currRunningCrawler) {
    currRunningCrawler.clear();
  }

  currRunningCrawler = createDefaultCrawler();
  currRunningCrawler.start().then(function () {
    log.debug('scrawlOnce: crawler done.');
  })["catch"](function (e) {
    log.debug('scrawlOnce: crawler quit with error: ', e);
  });
}

function downloadData() {
  return _downloadData.apply(this, arguments);
}

function _downloadData() {
  _downloadData = _asyncToGenerator(
  /*#__PURE__*/
  regeneratorRuntime.mark(function _callee5() {
    var clearAfterDownload,
        _require3,
        db,
        clear,
        workbook,
        tables,
        _i,
        _tables,
        tableName,
        data,
        option,
        sheet,
        timeStr,
        prefix,
        _args5 = arguments;

    return regeneratorRuntime.wrap(function _callee5$(_context5) {
      while (1) {
        switch (_context5.prev = _context5.next) {
          case 0:
            clearAfterDownload = _args5.length > 0 && _args5[0] !== undefined ? _args5[0] : false;
            log.debug('downloadData: tables: ', tables);
            _require3 = require('./db'), db = _require3.db, clear = _require3.clear;
            workbook = XLSX.utils.book_new();
            tables = ['campaigns_log', 'adgroups_log', 'keywords_log'];
            _i = 0, _tables = tables;

          case 6:
            if (!(_i < _tables.length)) {
              _context5.next = 17;
              break;
            }

            tableName = _tables[_i];
            _context5.next = 10;
            return db[tableName].toArray();

          case 10:
            data = _context5.sent;
            // const header = await db['headers'].where({ 'table_name': tableName }).first();
            option = undefined; // header ? { header } : undefined;

            sheet = XLSX.utils.json_to_sheet(data, option);
            XLSX.utils.book_append_sheet(workbook, sheet, tableName);

          case 14:
            _i++;
            _context5.next = 6;
            break;

          case 17:
            timeStr = moment().format('YYYY-MM-DD_HH-mm-ss');
            prefix = tables.length === 1 ? tables[0] : 'AntCrawler';
            XLSX.writeFile(workbook, "".concat(prefix, "_").concat(timeStr, ".xls"));

            if (!clearAfterDownload) {
              _context5.next = 23;
              break;
            }

            _context5.next = 23;
            return clear();

          case 23:
          case "end":
            return _context5.stop();
        }
      }
    }, _callee5);
  }));
  return _downloadData.apply(this, arguments);
}

function clearData() {
  var DB = require('./db');

  DB.clear();
}

function startDownloadScheduler() {
  var text = GM_config.get('downloadScheduleText') || 'at 23:58';
  log.debug('startDownloadScheduler: text:', text);

  try {
    var sched = later.parse.text(text);
    later.setInterval(function () {
      return downloadData(true);
    }, sched);
    log.debug('startDownloadScheduler: next 10 occurences: ', later.schedule(sched).next(10));
  } catch (e) {
    log.error(e);
    alert('定时器配置错误,请重新配置');
  }
}

var crawlerScheduler = null;
var currRunningCrawler = null;
window.onload =
/*#__PURE__*/
_asyncToGenerator(
/*#__PURE__*/
regeneratorRuntime.mark(function _callee4() {
  return regeneratorRuntime.wrap(function _callee4$(_context4) {
    while (1) {
      switch (_context4.prev = _context4.next) {
        case 0:
          initConfigPannel();
          later.date.localTime(); // 在 Tampermonkey 中,一个网页有多个 frame,每个 frame 都满足 userscript 的触发条件时,会启动多个实例。
          // 在 Tampermonkey 中,不同源的 iframe ,很难进行直接操作。所以,必须分开在两个环境中进行。
          // top window

          if (!(window.top == window.self)) {
            _context4.next = 9;
            break;
          }

          log.debug('top window'); // 初始化
          // 启动下载数据的调度器

          startDownloadScheduler(); // 恢复调度器

          schedulerSaver.restoreScrawlerScheduler(startCrawlerSchedulerByText); // 恢复之前未完成的爬取任务

          if (crawlerSaver.hasUnfinishedTask()) {
            log.debug("restore Unfinished Crawler");

            if (!loginOptions.needLogin()) {
              currRunningCrawler = createDefaultCrawler();
              crawlerSaver.restoreCrawler(currRunningCrawler);
            }
          }

          _context4.next = 15;
          break;

        case 9:
          // inner window
          log.debug('inner window'); // 判断是否是登陆页面

          if (!(crawlerSaver.hasUnfinishedTask() && loginOptions.needLogin())) {
            _context4.next = 15;
            break;
          }

          log.debug("login");
          currRunningCrawler = createDefaultCrawler();
          _context4.next = 15;
          return currRunningCrawler.login();

        case 15:
        case "end":
          return _context4.stop();
      }
    }
  }, _callee4);
}));

},{"./AdgroupsPage":1,"./CampaignsPage":2,"./KeywordsPage":3,"./crawler":4,"./crawlerSaver":5,"./crawlerSchedulerSaver":6,"./db":7,"./logger":10,"./retry":11}],10:[function(require,module,exports){
"use strict";

var createLog = function createLog(fn) {
  return function () {
    for (var _len = arguments.length, args = new Array(_len), _key = 0; _key < _len; _key++) {
      args[_key] = arguments[_key];
    }

    return fn.apply(void 0, ["[ ==== ".concat(moment().format('YYYY-MM-DD HH:mm:ss'), " ==== ]")].concat(args));
  };
};

var log = {
  debug: createLog(console.log),
  log: createLog(console.log),
  trace: createLog(console.trace),
  info: createLog(console.info),
  warn: createLog(console.warn),
  error: createLog(console.error)
};
module.exports = log;

},{}],11:[function(require,module,exports){
"use strict";

function asyncGeneratorStep(gen, resolve, reject, _next, _throw, key, arg) { try { var info = gen[key](arg); var value = info.value; } catch (error) { reject(error); return; } if (info.done) { resolve(value); } else { Promise.resolve(value).then(_next, _throw); } }

function _asyncToGenerator(fn) { return function () { var self = this, args = arguments; return new Promise(function (resolve, reject) { var gen = fn.apply(self, args); function _next(value) { asyncGeneratorStep(gen, resolve, reject, _next, _throw, "next", value); } function _throw(err) { asyncGeneratorStep(gen, resolve, reject, _next, _throw, "throw", err); } _next(undefined); }); }; }

var log = require('./logger');

function retry(_x) {
  return _retry.apply(this, arguments);
}

function _retry() {
  _retry = _asyncToGenerator(
  /*#__PURE__*/
  regeneratorRuntime.mark(function _callee(fn) {
    var count,
        interval,
        retriesLeft,
        _args = arguments;
    return regeneratorRuntime.wrap(function _callee$(_context) {
      while (1) {
        switch (_context.prev = _context.next) {
          case 0:
            count = _args.length > 1 && _args[1] !== undefined ? _args[1] : 5;
            interval = _args.length > 2 && _args[2] !== undefined ? _args[2] : 1000;
            retriesLeft = count;
            return _context.abrupt("return", new Promise(function (resolve, reject) {
              fn().then(resolve)["catch"](function (error) {
                setTimeout(function () {
                  if (retriesLeft <= 1) {
                    // reject('maximum retries exceeded');
                    reject(error);
                  } else {
                    retry(fn, retriesLeft - 1, interval).then(resolve, reject);
                  } // Passing on "reject" is the important part

                }, interval);
              });
            }));

          case 4:
          case "end":
            return _context.stop();
        }
      }
    }, _callee);
  }));
  return _retry.apply(this, arguments);
}

function check(_x2) {
  return _check.apply(this, arguments);
}

function _check() {
  _check = _asyncToGenerator(
  /*#__PURE__*/
  regeneratorRuntime.mark(function _callee2(fn) {
    var count,
        interval,
        _args2 = arguments;
    return regeneratorRuntime.wrap(function _callee2$(_context2) {
      while (1) {
        switch (_context2.prev = _context2.next) {
          case 0:
            count = _args2.length > 1 && _args2[1] !== undefined ? _args2[1] : 5;
            interval = _args2.length > 2 && _args2[2] !== undefined ? _args2[2] : 1000;
            return _context2.abrupt("return", new Promise(function (resolve, reject) {
              var retryLeft = count;
              var timerID = setInterval(function () {
                log.debug('check: retryLeft:', retryLeft);

                if (fn()) {
                  clearInterval(timerID);
                  resolve();
                  return;
                }

                retryLeft--;

                if (retryLeft <= 0) {
                  clearInterval(timerID);
                  reject();
                }
              }, interval);
            }));

          case 3:
          case "end":
            return _context2.stop();
        }
      }
    }, _callee2);
  }));
  return _check.apply(this, arguments);
}

function waitUntil(_x3) {
  return _waitUntil.apply(this, arguments);
}

function _waitUntil() {
  _waitUntil = _asyncToGenerator(
  /*#__PURE__*/
  regeneratorRuntime.mark(function _callee3(fn) {
    var maxWait,
        interval,
        _args3 = arguments;
    return regeneratorRuntime.wrap(function _callee3$(_context3) {
      while (1) {
        switch (_context3.prev = _context3.next) {
          case 0:
            maxWait = _args3.length > 1 && _args3[1] !== undefined ? _args3[1] : 10000;
            interval = _args3.length > 2 && _args3[2] !== undefined ? _args3[2] : 1000;
            return _context3.abrupt("return", check(fn, Math.ceil(maxWait / interval), interval));

          case 3:
          case "end":
            return _context3.stop();
        }
      }
    }, _callee3);
  }));
  return _waitUntil.apply(this, arguments);
}

function delayDo(_x4) {
  return _delayDo.apply(this, arguments);
}

function _delayDo() {
  _delayDo = _asyncToGenerator(
  /*#__PURE__*/
  regeneratorRuntime.mark(function _callee4(fn) {
    var delay,
        _args4 = arguments;
    return regeneratorRuntime.wrap(function _callee4$(_context4) {
      while (1) {
        switch (_context4.prev = _context4.next) {
          case 0:
            delay = _args4.length > 1 && _args4[1] !== undefined ? _args4[1] : 1000;
            return _context4.abrupt("return", new Promise(function (resolve, _) {
              setTimeout(function () {
                fn();
                resolve();
              }, delay);
            }));

          case 2:
          case "end":
            return _context4.stop();
        }
      }
    }, _callee4);
  }));
  return _delayDo.apply(this, arguments);
}

module.exports = {
  retry: retry,
  check: check,
  waitUntil: waitUntil,
  delayDo: delayDo
};

},{"./logger":10}]},{},[9]);