barretlee / 通用字幕嗅探器(带语音识别兜底+语言配置)

// ==UserScript==
// @name         通用字幕嗅探器(带语音识别兜底+语言配置)
// @namespace    https://openuserjs.org/users/barretlee
// @version      0.6
// @description  自动嗅探字幕并提取到console,若无字幕则用Web Speech API兜底识别(支持语言配置)
// @author       Barret Lee <barret.china@gmail.com>
// @match        *://www.bilibili.com/*
// @match        *://www.youku.com/*
// @match        *://v.qq.com/*
// @match        *://www.youtube.com/*
// @match        *://*.iqiyi.com/*
// @match        *://www.netflix.com/*
// @match        *://www.disneyplus.com/*
// @match        *://tv.apple.com/*
// @match        *://www.primevideo.com/*
// @match        *://www.xiaoyuzhoufm.com/*
// @match        *://play.max.com/*
// @grant        GM_registerMenuCommand
// @grant        GM_setValue
// @grant        GM_getValue
// @license      MIT
// ==/UserScript==

(function () {
  'use strict';

  let config = GM_getValue("subtitle_config", {
    keywords: ["subtitle", "timedtext", "aisubtitle", "srt"], // URL关键字
    debug: true,
    lang: "zh-CN"
  });

  let subtitles = [];
  let subtitleFound = false;

  // ---------- 菜单配置 ----------
  GM_registerMenuCommand("配置字幕关键字", () => {
    const input = prompt("请输入URL中的关键字,逗号分隔", config.keywords.join(","));
    if (input) {
      config.keywords = input.split(",").map(s => s.trim());
      GM_setValue("subtitle_config", config);
      alert("配置已保存: " + config.keywords.join(", "));
    }
  });

  GM_registerMenuCommand("配置语音识别语言", () => {
    const input = prompt("请输入语言代码 (如 zh-CN, en-US, ja-JP)", config.lang);
    if (input) {
      config.lang = input.trim();
      GM_setValue("subtitle_config", config);
      alert("语音识别语言已保存: " + config.lang);
    }
  });

  function tryExtract(data, url) {
    let subs = [];

    if (data.body && Array.isArray(data.body)) { // B站
      subs = data.body.map(x => ({
        from: x.from,
        to: x.to,
        content: x.content
      }));
    }
    else if (data.events && Array.isArray(data.events)) { // YouTube
      subs = data.events.flatMap(e =>
        (e.segs || []).map(s => ({
          from: e.tStartMs / 1000,
          to: (e.tStartMs + e.dDurationMs) / 1000,
          content: s.utf8
        }))
      );
    }
    else if (Array.isArray(data)) { // 通用
      data.forEach(x => {
        if (x.content) subs.push({
          from: x.from || 0,
          to: x.to || 0,
          content: x.content
        });
      });
    }

    if (subs.length > 0) {
      subtitleFound = true;
      subtitles.push(...subs);
      console.log("%c字幕来源", "background: #0066cc; color: #fff; padding:2px 4px; border-radius:2px;");
      console.log(url);
      console.log("%c字幕新增", "background: #0066cc; color: #fff; padding:2px 4px; border-radius:2px;");
      console.log(subs);
      console.log("%c当前字幕总数组", "background: #0066cc; color: #fff; padding:2px 4px; border-radius:2px;");
      console.log(subtitles);
      console.log("%c字幕新增", "background: #0066cc; color: #fff; padding:2px 4px; border-radius:2px;");
      console.log(subs.map(v => v.content).join(','));
    }

  }

  // ---------- fetch/XHR 拦截 ----------
  const origFetch = window.fetch;
  window.fetch = async function (...args) {
    const res = await origFetch.apply(this, args);
    const url = args[0].toString();
    if (config.keywords.some(k => url.includes(k))) {
      res.clone().json().then(d => tryExtract(d, url)).catch(() => {});
    }
    return res;
  };

  const origOpen = XMLHttpRequest.prototype.open;
  XMLHttpRequest.prototype.open = function (...args) {
    this._url = args[1];
    return origOpen.apply(this, args);
  };
  const origSend = XMLHttpRequest.prototype.send;
  XMLHttpRequest.prototype.send = function (...args) {
    this.addEventListener("load", function () {
      const url = this._url || "";
      if (config.keywords.some(k => url.includes(k))) {
        try {
          const d = JSON.parse(this.responseText);
          tryExtract(d, url);
        }
        catch (e) {}
      }
    });
    return origSend.apply(this, args);
  };

  // ---------- Web Speech API ----------

  function startSpeechRecognition(media) {
    if (!("webkitSpeechRecognition" in window) && !("SpeechRecognition" in window)) {
      console.warn("浏览器不支持 Web Speech API");
      return;
    }

    const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
    const recognition = new SpeechRecognition();
    recognition.continuous = true;
    recognition.interimResults = true;
    recognition.lang = config.lang || "zh-CN"; // 使用配置的语言

    let lastTranscript = "";
    let regSubs = [];
    recognition.onresult = function (event) {
      let transcript = "";
      for (let i = event.resultIndex; i < event.results.length; i++) {
        transcript += event.results[i][0].transcript;
      }
      transcript = transcript.trim();

      // 只输出新增的部分
      if (transcript && transcript !== lastTranscript) {
        const newPart = transcript.replace(lastTranscript, "");
        if (newPart.trim()) {
          // console.log(newPart.trim());
          regSubs.push(newPart.trim())
        }
        lastTranscript = transcript;
      }
    };

    setInterval(() => {
      console.log(regSubs.join(''));
      regSubs = [];
    }, 5E3);

    recognition.onerror = e => console.error("识别错误:", e);

    recognition.start();
    console.log("语音识别已启动,语言:", recognition.lang);
  }

  // ---------- 定时兜底 ----------
  setTimeout(() => {
    if (!subtitleFound) {
      const media = document.querySelector("video") || document.querySelector("audio");
      if (media) {
        console.log(
          "%c未发现字幕接口,尝试语音识别 (" + media.tagName.toLowerCase() + ")...",
          "background: #0066cc; color: #fff; padding:2px 4px; border-radius:2px;"
        );
        startSpeechRecognition(media);
      }
    }
  }, 5E3);

})();