NOTICE: By continued use of this site you understand and agree to the binding Terms of Service and Privacy Policy.
// ==UserScript== // @name Author.Today OpenAI TTS // @namespace http://tampermonkey.net/ // @version 2024-07-20 // @description Uses OpenAI's TTS to read the books in on-site reader. // @homepageURL https://github.com/chamie/userScripts/blob/main/AuthorToday-OpenAI-TTS.user.js // @author Chamie // @match https://author.today/reader/* // @icon https://www.google.com/s2/favicons?sz=64&domain=author.today // @grant none // @license MIT // ==/UserScript== //Striped border style: /* ` border: 10px solid; border-image-outset: 0; border-image-repeat: stretch; border-image-slice: 100%; border-image-source: none; border-image-width: 1; border-image: repeating-linear-gradient(45deg, white,white, black, black, white 20px) 9; ` */ /** * @typedef {Object} Paragraph * @property {HTMLParagraphElement} element * @property {Promise<ArrayBuffer>} audio * @property {string} text */ // TODO: store settings using GM_setValue()/GM_getValue(). // TODO: add UI for setting the OpenAI token. (function () { // Settings: const openAIToken = "INSERT-YOUR-API-KEY-HERE"; /** @type {Paragraph[]} */ let paragraphs = []; const loader = ` <svg viewBox="0 0 10 5" xmlns="http://www.w3.org/2000/svg"> <rect width="100%" height="10" style="fill: #dbf7ff; opacity: 0.7"> <animate attributeName="x" values="-100%; 100%" dur="1s" repeatCount="indefinite" /> </rect> </svg> `.replaceAll(/\n/g, ""); const $ = selector => document.querySelector(selector); const $$ = selector => [...document.querySelectorAll(selector)]; /** @type {"idle"|"playing"|"paused"} */ let currentAction = "idle"; let playbackRate = 1.2; const audio = new Audio(); audio.controls = true; audio.className = "tts-audio-player"; $("nav").append(audio); // Adding component CSS styles const style = document.createElement("style"); style.innerHTML = ` p.beingNarrated { box-shadow: 2px 2px 3px black, -2px -2px 3px white; } .tts-controls-container { float: left; height: 32px; margin: 0 20px; padding: 0; border: none; display: flex; flex-direction: row; align-items: center; justify-content: space-around; } .tts-controls-container button { display: none; opacity: .7; margin: 0 5px; } .tts-controls-container.playing .pause, .tts-controls-container.playing .stop, .tts-controls-container.paused .play, .tts-controls-container.paused .stop, .tts-controls-container.idle .play { display: block; } .tts-audio-player { float: left; height: 32px; margin: 0 20px; display: none; } .tts-controls-container svg { margin-right: -64px; width: 64px; display: none; } .tts-controls-container.isLoading svg { display: block; } .tts-playback-rate-controls { display: flex; flex-direction: row; } .tts-playback-rate-controls span { padding: 5px; } `; document.body.append(style); // Adding controls const controlsContainer = document.createElement("div"); controlsContainer.className = "tts-controls-container"; controlsContainer.innerHTML = loader; controlsContainer.title = "Text-to-Speech controls, you can also start narration by pressing Shift+R on the keyboard"; const setAction = actionName => { controlsContainer.classList.remove("paused", "playing", "idle"); controlsContainer.classList.add(actionName); currentAction = actionName; } const setLoading = isLoading => { if (isLoading) { controlsContainer.classList.add("isLoading"); } else { controlsContainer.classList.remove("isLoading"); } } const actions = { play: () => { if (currentAction === "playing") { return; } if (currentAction === "paused") { audio.play(); } else { startReading(); } setAction("playing"); }, stop: () => { setAction("idle"); audio.src = undefined; $$("p.beingNarrated").forEach(p => p.classList.remove("beingNarrated")); }, pause: () => { setAction("paused"); audio.pause(); } } const buttons = [ ["play", "⏵", actions.play], ["pause", "⏸", actions.pause], ["stop", "⏹", actions.stop], ].map(btn => { const [className, text, handler] = btn; const button = document.createElement("button"); button.innerHTML = text; button.className = className; button.onclick = handler; return button; }) controlsContainer.append(...buttons); const speedControls = document.createElement("div"); speedControls.className = "tts-playback-rate-controls"; speedControls.title = `Narration speed: ${playbackRate}`; const speedSlider = document.createElement("input"); speedSlider.type = "range"; speedSlider.min = 0.25; speedSlider.max = 4; speedSlider.step = .1; speedSlider.value = playbackRate; const speedValue = document.createElement("span"); speedSlider.oninput = () => { playbackRate = speedSlider.value; audio.playbackRate = playbackRate; speedControls.title = `Narration speed: ${playbackRate}`;; speedValue.innerHTML = playbackRate; } speedControls.append(speedSlider, speedValue); controlsContainer.append(speedControls); $("nav").append(controlsContainer); let loadingCounter = 0; /** * Converts text into speech audio using the OpenAI TTS API * @param {string} text Text to convert into audio * @returns {Promise<ArrayBuffer>} ArrayBuffer with the content of mp3 audio file */ const fetchAudio = async (text) => { setLoading(true); loadingCounter++; try { const response = await fetch("https://api.openai.com/v1/audio/speech", { method: "POST", headers: { "Authorization": `Bearer ${openAIToken}`, "Content-Type": "application/json" }, body: JSON.stringify({ model: "tts-1", input: text, voice: "onyx", }) }); loadingCounter--; console.debug({ loadingCounter }); if (!loadingCounter) { setLoading(false); } return response.arrayBuffer(); } catch (e) { loadingCounter--; console.error(e); return null; } } /** * Plays the audio data. * @param data {Response} Response data of the audio file. */ const playAudio = (data) => { const blob = new Blob([data], { type: "audio/mpeg" }); audio.src = URL.createObjectURL(blob); audio.playbackRate = playbackRate; audio.play(); //We may show the player: //audio.style.display = "block"; } const startReading = async () => { paragraphs = paragraphs.length ? paragraphs : $$("#text-container p").map(p => ({ element: p, text: p.innerText, })); const readerScrollTop = $("#reader").scrollTop; const firstVisibleParagraphIdx = paragraphs.findIndex(paragraph => paragraph.element.offsetTop - readerScrollTop > 0); let currentParagraphIdx = firstVisibleParagraphIdx; const getBufferedAudioLengthInCharacters = () => paragraphs.slice(currentParagraphIdx) .filter(x => x.audio) .reduce((acc, p) => acc += p.text.length, 0); const topUpAudioBuffer = () => { while (currentParagraphIdx < paragraphs.length - 1 && getBufferedAudioLengthInCharacters() < 4000) { const paragraph = paragraphs.slice(currentParagraphIdx).find(x => !x.audio); if (!paragraph) { break; } paragraph.audio = fetchAudio(paragraph.text); } } const playNext = async () => { $$("p.beingNarrated").forEach(p => p.classList.remove("beingNarrated")); const paragraph = paragraphs[currentParagraphIdx]; if (!paragraph) { actions.stop(); return; } topUpAudioBuffer(); paragraph.element.classList.add("beingNarrated"); const audio = await paragraph.audio; if (audio === undefined) { console.error(`No audio request sent for paragraph`, paragraph); return; } playAudio(audio); // This comes too high: //paragraph.scrollIntoView(); $("#reader").scrollTop = paragraph.element.offsetTop; currentParagraphIdx++; } audio.addEventListener("ended", playNext); topUpAudioBuffer(); playNext(); } document.addEventListener("keyup", event => { if (event.key === "R") { if (currentAction === "idle") { actions.play(); } else { actions.pause(); } } }); actions.stop(); })();