NOTICE: By continued use of this site you understand and agree to the binding Terms of Service and Privacy Policy.
// ==UserScript== // @name Bangumi-To Romaji Title // @name:zh-CN 班固米-获取条目罗马字标题 // @version 0.3.5 // @description Retrieve the Romaji title of the subject and display it in the infobox // @author weiduhuo // @namespace https://github.com/weiduhuo/scripts // @match *://bgm.tv/subject/* // @match *://bangumi.tv/subject/* // @match *://chii.in/subject/* // @grant none // @license MIT // @description:zh-CN 基于MyAnimeList的非官方API Jikan,获取条目的罗马字标题,并呈现于infobox // ==/UserScript== (function () { 'use strict'; const SCRIPT_NAME = '班固米-罗马名获取组件'; /** 单次查询返回的结果数量上限 */ const QueryLimit = 10; /** 相关度的最低采用阈值 (含自身) */ const minRelThr = 2.5; /** 相关度的触发再尝阈值 (不含自身) */ const retryRelThr = 7.5; /** 枚举启用状态 */ const EnableState = { /** 全部 */ ALL_ENABLED: 'allEnabled', /** 仅中日 */ ONLY_CJ: 'onlyChinese&Japanese', /** 仅日文 */ ONLY_JAPANESE: 'onlyJapanese', }; /** 启用状态 */ let enableState = EnableState.ONLY_CJ; /** 所支持的条目类型 */ const SubjectType = ['anime']; /** 地区待选标签 */ const RegionTags = [ ['中国', '国产'], ['日本', '日本动画'], ]; const Region = { cn: 1, jp: 2, parse(value) { for (const [k, v] of Object.entries(this)) if (value === v) return k; return 'null'; } }; /** 媒体类型映射 BGM to MAL */ const PlatformMap = { 'anime': { 'TV': 'tv', // ['tv','tv_special'] 将通过 subTags 尝试区分 '剧场版': 'movie', 'OVA': 'ova', 'WEB': '', // ['ona','music','special','cm','pv',...] 一对多,但 Jikan API 不支持多参数,因此空缺转而全范围搜索 // 'music' 将通过 subTags 尝试区分 '动态漫画': '', }, }; /** 匹配假名 */ const KanaRe = /[\p{sc=Hiragana}\p{sc=Katakana}]/u; /** 匹配汉字与假名 */ const ZnJpRe = /[\p{sc=Hiragana}\p{sc=Katakana}\u30FC\u31F0-\u31FF\uFF61-\uFF9F\p{sc=Han}]/u; /** 匹配仅包含拉丁字母与符号 */ // const OnlyLatinRe = /^[\s\u0020-\u00FF\u2000-\u206F\u2150-\u218F\u25A0-\u26FF\u3000-\u301E\uFE30-\uFF65\uFFE0-\uFFEF]+$/; // 上述分别匹配了空白字符、基本拉丁字母及补充、常用标点符号、数字形式、几何图形及杂项符号、CJK常见标点符号、 CJK兼容符号 /** 匹配标题前缀 */ const PrefTitleRe = /^(((劇場版?)?総集|短)編|(映画|劇場|同人)版?)\s*|.?(Official )?Music Video.?/i; /** 匹配标题后缀 */ const SuffTitleRe = /\s[^\s]*(版)$/; // const SuffTitleRe = /\s[^\s]*([\d\u2150-\u218F][^\s]*|版)$/; /** 匹配标题短语 */ const PhrasesRe = /(((劇場版?)?総集|短)編|(映画|劇場|同人)版?)|(\d+)|([a-z]{2,}|[之的]|(?:(?![之的])[\p{sc=Han}])+|[\p{sc=Hiragana}\u30FC]+|[\p{sc=Katakana}\u30FC\u31F0-\u31FF\uFF61-\uFF9F]+)/ug; // 注意 \p{scx=Han} 会匹配 '『』'符号 /** 匹配标题短语分层过滤 */ const PhrasesFilterRe = /^(映画|アニメ|第|st|nd|rd|th|season|章)$/; /** 匹配标题内符号 */ const PunctRe = /[\u2000-\u206F\u25A0-\u26FF\u3000-\u301E\uFE30-\uFF65\uFFE0-\uFFEF]/g; async function main() { const subType = getSubjectType(); if (!SubjectType.includes(subType)) return; // 基于条目地区,判断是否启用功能 const infobox = document.querySelector('#infobox'); const rawTitle = getSubjectTitle(); const isLatinTitle = !ZnJpRe.test(rawTitle); // 判断标题是否仅包含拉丁字母 const subTags = getSubjectTags(); let region = includeTargetTag(subTags, ...RegionTags); if (!region) { // 通过标题与角色名进行兜底 (公共标签未完全覆盖) if (KanaRe.test(rawTitle) || charNameHasKana() || KanaRe.test(getSubjectSummary())) { region = Region.jp; } else { if (enableState === EnableState.ALL_ENABLED) addTitle(infobox, region, isLatinTitle, rawTitle); return; }} if (region === Region.cn && enableState === EnableState.ONLY_JAPANESE) return; // 添加待定的名称 const titleLis = addTitle(infobox, region, isLatinTitle); // 尝试先通过 sessionStorage 获取已存储的数据 const [, id] = getSubjectId(); const key = `subtitle-${id}`; let data = sessionStorage.getItem(key); // data = null; if (data) { data = JSON.parse(data); updateTitle(titleLis, [data.romaji, data.english]); console.log(`${SCRIPT_NAME}:`, { 'relScore': data.relScore, 'romaji': data.romaji, 'english': data.english, }); if(data.url) console.log(`${SCRIPT_NAME}:`, data.url); // url单独打印,以可直接点击 return; } // 初步解析网页数据 (用于API查询的数据优先) let platform = getPlatform(subType); if (includeTargetTag(subTags, ['MV'])) { platform = 'music'; } if (platform === 'tv' && includeTargetTag(subTags, ['OVA', 'SP', 'TVSP'], ['MV']) === 1) { platform = 'tv_special'; } const tips = infobox.querySelectorAll('span.tip'); const startDate = getStartDate(infobox, tips); let episodes, notFirstPart; // 延后解析 // 尝试获取名称 let subs, relScore, titles, url, mainTitle, phraseSet; mainTitle = rawTitle.replace(PrefTitleRe, ''); // 修复 Jikan API 首字符匹配权重过大的问题 mainTitle = mainTitle.replace(PunctRe, ' '); // 修复 Jikan API 对诸如「」等符号匹配权重过大的问题 const queryStartDate = startDate ? `${startDate.year - 1}-01-01` : ''; // 保守起见,仅精确到年份,并回退一年 await handlerQuery(platform, queryStartDate); const data_1 = packData(); if (relScore >= minRelThr ) { updateTitle(titleLis, titles); sessionStorage.setItem(key, JSON.stringify(data_1)); } if (relScore >= retryRelThr) return; // 相关度较低,扩大搜索范围 console.log(`${SCRIPT_NAME}:相关度较低,扩大搜索范围,再次尝试`); const preCl = relScore; mainTitle = mainTitle.replace(SuffTitleRe, ''); // 删除如 'シーズン2' 的后缀,只保留主标题 await handlerQuery(); // 由于搜索的平台范围扩大,降低相关度得分 if (platform && relScore) relScore -= 0.5; const data_2 = packData(); if (relScore >= minRelThr && relScore > preCl) { updateTitle(titleLis, titles); sessionStorage.setItem(key, JSON.stringify(data_2)); } else if (relScore < minRelThr && preCl < minRelThr ) { updateTitle(titleLis, ['NULL', 'NULL']); } function packData() { const data = { 'relScore': relScore / 10, 'romaji': titles[0], 'english': titles[1], }; console.log(`${SCRIPT_NAME}:`, data); if (url) console.log(`${SCRIPT_NAME}:`, url); data.url = url; return data; } /** 执行一次查询 */ async function handlerQuery(_platform = '', _startDate ='') { const promise = querySubject(mainTitle, subType, _platform, _startDate); // 同步解析网页数据 (减少忙等API) episodes ??= getEpisodes(infobox, tips); // 判断首集序号是否为开头,防止 BGM 与 MyAnimeList 条目合并不同 notFirstPart ??= !isFirstPart(); phraseSet = getPhraseSet(mainTitle); if (!titles) { console.log(`${SCRIPT_NAME}:`, { 'region': Region.parse(region), 'platform': platform, 'episodes': episodes, 'startDate': startDate, 'phraseSet': phraseSet, }); } subs = await promise; if (Array.isArray(subs)) { [relScore, titles, url] = searchSubject(subs, phraseSet, isLatinTitle, startDate, episodes); } else { [relScore, titles, url] = [0, [subs, subs], null]; } // 对于非首Part的条目的开播时间参考的相关度降低 if (notFirstPart) relScore *= 0.75; titles = titles.map((title) => title.replace(/\s\((TV|OVA)\)/, '')); // 删除后缀 } } /** * 通过条目原标题获取相关条目数据集 * @param {string} title 原标题 * @param {string} subType 条目类型 * @param {string} platform 媒体类型 * @param {string} startDate 起始日期 'Y-m-d' * @param {number} limit 指定返回的结果数量 * @returns {Promise<string | Array<Object>>} 条目数据集 */ async function querySubject(title, subType, platform, startDate, limit = QueryLimit) { const url = new URL(`https://api.jikan.moe/v4/${subType}`); url.searchParams.set('limit', limit); url.searchParams.set('q', title); if (platform) url.searchParams.set('type', platform); if (startDate) url.searchParams.set('start_date', startDate); try { console.time(`Jikan API`); const response = await fetch(url); console.timeEnd(`Jikan API`); const data = await response.json(); const subs = data.data; if (!subs || subs.length === 0) return 'NULL'; else return subs; } catch (error) { console.error('Jikan API请求失败:', error); return 'ERROR'; } } /** * @param {Array<Object>} subs * @param {Set<string>} phraseSet * @param {boolean} isLatinTitle * @param {{year: number, month: number, day: number} | null} startDate 开播时间 * @param {number | null} episodes 集数 * @returns {[number, [Object], string]} [`relScore`, [`romaji`, `english`], `url`] * - `relScore`相关度 - 10分值,6分为原标题短语的匹配度,4分为开播时间与集数的匹配度 */ function searchSubject(subs, phraseSet, isLatinTitle, startDate, episodes) { const tmpSubs = []; console.groupCollapsed(`${SCRIPT_NAME}:详情`); subs.forEach((sub, index) => { let similarity; // 计算 jaccard 相似度 if (!isLatinTitle) { similarity = jaccardSimilarity(phraseSet, sub.title_japanese); } else { // 当搜索词全为拉丁字母时,同时考虑罗马音标题与英文标题 similarity = Math.max( jaccardSimilarity(phraseSet, sub.title), jaccardSimilarity(phraseSet, sub.title_english) ); } sub.relScore = similarity * 6; // 旧方法难以区分 '日常 Eテレ版' 2012-1 在 ['日常' 2011-4, '男子高校生の日常' 2012-1 ] /* const title = (isLatinTitle ? sub.title : sub.title_japanese).toLowerCase(); const simScore = phraseSet.keys().reduce((acc, val) => acc + title.includes(val), 0); sub.relScore = simScore * 6 / phraseSet.size; */ if (sub.relScore) tmpSubs.push(sub); sub.index = index; console.log({ 'index': index, 'simScore': sub.relScore, 'type': sub.type, 'startDate': sub.aired.from ? sub.aired.from.split('T')[0] : null, 'episodes': sub.episodes, 'japanese': sub.title_japanese, 'romanji': sub.title, 'english': sub.title_english, 'url': sub.url, }); }); if (tmpSubs.length) { subs = tmpSubs; subs.sort((a, b) => b.relScore - a.relScore); } if (!startDate) { return getResult(1, subs); } const sameYearSubs = subs.filter(sub => sub.aired.prop.from.year === startDate.year); if (sameYearSubs.length === 0) { return getResult(0, subs); } const sameMonthSubs = sameYearSubs.filter(sub => startDate.month && sub.aired.prop.from.month === startDate.month); if (sameMonthSubs.length === 0) { return getResult(2, sameYearSubs); } if (sameMonthSubs.length === 1) { return getResult(4, sameMonthSubs); } if (!episodes) { return getResult(3, sameMonthSubs); } // 开播时间相同的有多个,寻找集数差异最小的 let minDiff = Infinity, index = 0; sameMonthSubs.forEach((sub, _index) => { const diff = Math.abs(episodes - sub.episodes); if (diff < minDiff) { minDiff = diff; index = _index; } }); sameMonthSubs[index].relScore += 2; return getResult(2, sameMonthSubs); /** 计算最终相似度,并获取结果 */ function getResult(offset, _subs) { _subs.forEach(sub => { sub.relScore += offset }); subs.sort((a, b) => b.relScore - a.relScore); // console.groupEnd(); console.log('sortedByRelScore:', subs.map(sub => [sub.index, sub.relScore])); const sub = subs[0]; console.log('result:', sub.index); console.groupEnd(); // console.log(sub); sub.title_english ??= 'NULL'; return [sub.relScore, [sub.title, sub.title_english], sub.url]; } } function jaccardSimilarity(set1, str2) { const set2 = getPhraseSet(str2); const intersection = new Set([...set1].filter(x => set2.has(x))); const union = new Set([...set1, ...set2]); return intersection.size / union.size; } function getPhraseSet(title) { if (!title) return new Set(); const phrases = title.toLowerCase().match(PhrasesRe); if (!phrases) return new Set(); return new Set(phrases .filter((s) => !PhrasesFilterRe.test(s)) ); } function getSubjectType() { return document.querySelector('#navMenuNeue .focus').getAttribute('href').split('/')[1]; } function getSubjectTitle() { return document.querySelector('#headerSubject > h1 > a').textContent.trim(); } function getSubjectTags() { return document.querySelectorAll('.subject_tag_section > .inner span'); } function getSubjectSummary() { return document.querySelector('#subject_summary').textContent; } function getSubjectId() { const urlPattern = /^\/(.+)\/(\d+)$/; const match = window.location.pathname.match(urlPattern); if (!match) return [null, null]; const [, patternType, subId] = match; return [patternType, subId]; } /** * @param {NodeListOf<Element>} subTags * @param {...Array<string>} targetTypeTags 目标种类的标签 * @returns {number} 种类编号由1开始,0表不存在 */ function includeTargetTag(subTags, ...targetTypeTags) { for (const tag of subTags) { const _tag = tag.textContent.trim(); for (const [type, targetTags] of targetTypeTags.entries()) { if (targetTags.includes(_tag)) return type + 1; } } return 0; } function charNameHasKana() { const chars = document.querySelectorAll('#browserItemList strong'); for (const char of chars) { if (KanaRe.test(char.innerText)) return true; } return false; } function getPlatform(subType) { const smallTag = document.querySelector('#headerSubject > h1 > small.grey'); if (smallTag) { const platform = smallTag.innerText.trim(); return platform in PlatformMap[subType] ? PlatformMap[subType][platform] : ''; } else return ''; } function isFirstPart() { const firstEp = document.querySelector('#subject_detail > .subject_prg > .prg_list > li:first-child'); if (firstEp) { return ['00', '01'].includes(firstEp.innerText.trim()); } else return true; } /** * @param {HTMLElement} infobox * @param {NodeListOf<HTMLElement>} tips * @returns {number | null} */ function getEpisodes(infobox, tips) { const limit = 10; let ep = null; for (const [i, tip] of tips.entries()) { if (i > limit) return null; if (tip.innerText.trim() === '话数:') { ep = tip; break; } } if (!ep) return null; while (ep.parentElement !== infobox) { ep = ep.parentElement; } const match = ep.textContent.match(/(\d+)/); if (match) return +match[1]; else return null; } /** * @param {HTMLElement} infobox * @param {NodeListOf<HTMLElement>} tips * @param {number} region * @returns {{year: number, month: number, day: number} | null} */ function getStartDate(infobox, tips, region) { const regex = /(开始|(?:放送|播出)(?:开始|日期))|([上公]映(?!许可))|(发售)/; // 优先级 1 > 2 > 3 其大部分时是准确的 let date = null; let preIndex = 10, index; for (const tip of tips) { const match = tip.innerText.match(regex); if (match) { for (const [i, m] of match.slice(1, 4).entries()) if (m) { index = i + 1; break; } if (index < preIndex) { date = tip; // 仅优先级更高的才可覆盖 preIndex = index; } if (index === 1) break; } } if (!date) return null; while (date.parentElement !== infobox) { date = date.parentElement; } const dateText = date.textContent; let match; if (region === Region.jp) { // 优先匹配日本时间 match = dateText.match(/日本[^))]*(\d{4})[-/年]?(\d{1,2})?[-/月]?(\d{1,2})?[-/日]?/); match ??= dateText.match(/(\d{4})[-/年]?(\d{1,2})?[-/月]?(\d{1,2})?[-/日]?[\s((]+日本/); } match ??= dateText.match(/(\d{4})[-/年]?(\d{1,2})?[-/月]?(\d{1,2})?[-/日]?/); if (match) { return { year: +match[1], month: +match[2], day: +match[3] }; } else return null; } /** * @param {HTMLElement} infobox * @param {number} region * @param {boolean} isLatinTitle * @param {string} [title='···'] * @returns {[HTMLElement]} */ function addTitle(infobox, region, isLatinTitle, title = '···') { const romajiLi = document.createElement('li'); let romajiTip, englishLi = null; if (!region) { romajiTip = '索引名'; } else { if (isLatinTitle) { romajiTip = '索引名'; } else if (region === Region.jp) { romajiTip = '罗马名'; } else if (region === Region.cn) { romajiTip = '拼音名'; } englishLi = document.createElement('li'); englishLi.className = 'folded'; englishLi.innerHTML = `<span class="tip" style="user-select: none">英文名: </span>${title}`; } romajiLi.innerHTML = `<span class="tip" style="user-select: none">${romajiTip}: </span>${title}`; const firstLi = infobox.children[0]; const tip = firstLi.querySelector('span.tip'); const ref = tip && tip.innerText.trim() === '中文名:' ? firstLi.nextSibling : firstLi; infobox.insertBefore(romajiLi, ref); if (region) { infobox.insertBefore(englishLi, ref); return [romajiLi, englishLi]; } else { return [romajiLi]; } } /** * @param {[HTMLElement]} lis * @param {[string]} titles */ function updateTitle(lis, titles) { lis.forEach((li, index) => { li.childNodes[1].textContent = titles[index]; }) } main(); })();