From 8b6a27ae42c1880d143578cdf8f40e2b51e3ee0f Mon Sep 17 00:00:00 2001 From: Fox Cunning Date: Wed, 1 Jan 2025 18:48:17 +0000 Subject: [PATCH] Full rewrite of TTS system --- mikupad.html | 1290 ++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 1037 insertions(+), 253 deletions(-) diff --git a/mikupad.html b/mikupad.html index e374ba6..ff602a6 100644 --- a/mikupad.html +++ b/mikupad.html @@ -318,6 +318,32 @@ outline: 1px solid var(--color-base-50); outline-offset: 1px; } +#prompt-container #prompt-overlay > .preview { + color: var(--color-dark); + opacity: 40%; +} +html.serif-dark #prompt-container #prompt-overlay > .preview, +html.monospace-dark #prompt-container #prompt-overlay > .preview, +html.nockoffAI #prompt-container #prompt-overlay > .preview { + color: var(--color-light); + opacity: 30%; +} +#prompt-container #prompt-overlay > .nudge { + outline: 1px solid var(--color-dark); + margin-left: 5px; + padding-bottom: 2px; + padding: 0 4px; + font-size: calc(11px * var(--font-size-multiplier)); + background-color: transparent; + border-radius: 3px; + position: relative; + top: -2px; +} +html.serif-dark #prompt-container #prompt-overlay > .nudge, +html.monospace-dark #prompt-container #prompt-overlay > .nudge, +html.nockoffAI #prompt-container #prompt-overlay > .nudge { + border: 1px solid var(--color-light); +} #probs { position: absolute; @@ -999,6 +1025,7 @@ margin-left: auto; margin-right: auto; padding: 2px 10px; + max-width: 215px; } button { @@ -1419,36 +1446,7 @@ const API_LLAMA_CPP = 0; const API_KOBOLD_CPP = 2; const API_OPENAI_COMPAT = 3; - -window.TTS = { - prev_prompt: "", - new_prompt: "", - new_text: "", - voice: null, - voice_id: 0, - voices: [], - rate: 1, - pitch: 1, - volume: 1, - enabled: true -}; - -window.speechSynthesis.onvoiceschanged = function() { - const voices = window.speechSynthesis.getVoices(); - window.TTS.voices = voices; - window.TTS.voice = voices[0]; -}; - -function textToSpeech() { - // console.log("Text to read:\n" + window.TTS.new_text); // DEBUG - var text = new SpeechSynthesisUtterance(window.TTS.new_text); - text.voice = window.TTS.voice; - text.rate = window.TTS.rate; - text.pitch = window.TTS.pitch; - text.volume = window.TTS.volume; - if (window.speechSynthesis.speaking) { window.speechSynthesis.cancel() } - window.speechSynthesis.speak(text); -} +const API_AI_HORDE = 4; // Polyfill for piece of shit Chromium if (!(Symbol.asyncIterator in ReadableStream.prototype)) { @@ -1489,6 +1487,8 @@ urlString = urlString.replace(/\/v1\/?$/, ""); // remove "/v1" from the end of the string if (endpointAPI == API_KOBOLD_CPP) urlString = urlString.replace(/\/api\/?$/, ""); // remove "/api" from the end of the string + if (endpointAPI == API_AI_HORDE) + urlString = "https://aihorde.net/api"; urlString = urlString.replace(/\/$/, ""); // remove "/" from the end of the string return urlString; @@ -1500,7 +1500,7 @@ case API_LLAMA_CPP: return await llamaCppTokenCount({ endpoint, endpointAPIKey, signal, ...options }); case API_KOBOLD_CPP: - return await koboldCppTokenCount({ endpoint, signal, ...options }); + return await koboldCppTokenCount({ endpoint, endpointAPIKey, signal, ...options }); case API_OPENAI_COMPAT: // These endpoints don't have a token count endpoint... if (new URL(endpoint).host === 'api.openai.com' || new URL(endpoint).host === 'api.together.xyz') @@ -1519,6 +1519,8 @@ if (tokenCount != -1) return tokenCount; return 0; + default: + return 0; } } @@ -1532,7 +1534,7 @@ case API_LLAMA_CPP: return await llamaCppTokenize({ endpoint, endpointAPIKey, signal, ...options }); case API_KOBOLD_CPP: - return await koboldCppTokenize({ endpoint, signal, ...options }); + return await koboldCppTokenize({ endpoint, endpointAPIKey, signal, ...options }); case API_OPENAI_COMPAT: // These endpoints don't have a tokenenizer endpoint... if (new URL(endpoint).host === 'api.openai.com' || new URL(endpoint).host === 'api.together.xyz') @@ -1548,6 +1550,8 @@ if (tokens !== null) return tokens; return []; + default: + return []; } } @@ -1556,6 +1560,8 @@ switch (endpointAPI) { case API_OPENAI_COMPAT: return await openaiModels({ endpoint, endpointAPIKey, signal, ...options }); + case API_AI_HORDE: + return await aiHordeModels({ endpoint, endpointAPIKey, signal, ...options }); default: return []; } @@ -1567,9 +1573,19 @@ case API_LLAMA_CPP: return yield* await llamaCppCompletion({ endpoint, endpointAPIKey, signal, ...options }); case API_KOBOLD_CPP: - return yield* await koboldCppCompletion({ endpoint, signal, ...options }); + return yield* await koboldCppCompletion({ endpoint, endpointAPIKey, signal, ...options }); case API_OPENAI_COMPAT: return yield* await openaiCompletion({ endpoint, endpointAPIKey, signal, ...options }); + case API_AI_HORDE: + return yield* await aiHordeCompletion({ endpoint, endpointAPIKey, signal, ...options }); + } +} + +export async function* chatCompletion({ endpoint, endpointAPI, endpointAPIKey, signal, ...options }) { + endpoint = normalizeEndpoint(endpoint, endpointAPI); + switch (endpointAPI) { + case API_OPENAI_COMPAT: + return yield* await openaiChatCompletion({ endpoint, endpointAPIKey, signal, ...options }); } } @@ -1580,6 +1596,8 @@ return await koboldCppAbortCompletion({ endpoint, ...options }); case API_OPENAI_COMPAT: return await openaiOobaAbortCompletion({ endpoint, ...options }); + case API_AI_HORDE: + return await aiHordeAbortCompletion({ endpoint, ...options }); } } @@ -1623,6 +1641,9 @@ break; } const json = JSON.parse(data); + if (json.error?.message) { + throw new Error(json.error.message); + } // Both Chrome and Firefox suck at debugging // text/event-stream, so make it easier by logging events if (window.logSSEEvents) { @@ -1709,57 +1730,54 @@ }, body: JSON.stringify({ ...options, - stream: true, cache_prompt: true, }), signal, }); - if (!res.ok) + + if (!res.ok) { throw new Error(`HTTP ${res.status}`); - window.TTS.new_prompt = options.prompt; - window.TTS.new_text = ""; - for await (const chunk of parseEventStream(res.body)) { - const probs = chunk.completion_probabilities[0]?.probs ?? []; - const prob = probs.find(p => p.tok_str === chunk.content)?.prob; - window.TTS.new_text += chunk.content; - yield { - content: chunk.content, - ...(probs.length > 0 ? { - prob: prob ?? -1, - completion_probabilities: chunk.completion_probabilities - } : {}) - }; } - // Check if something has been added to the prompt, and read that too - if (window.TTS.prev_prompt != window.TTS.new_prompt) { - var prev = window.TTS.prev_prompt; - var next = window.TTS.new_prompt; - if (window.TTS.prev_prompt.length > 2000) { prev = window.TTS.prev_prompt.substr(1500) } - if (window.TTS.new_prompt.length > 2000) { next = window.TTS.new_prompt.substr(1500) } - window.TTS.prev_prompt = options.prompt + window.TTS.new_text; - // Find where the added/modified part begins - for (var c = 0; c < Math.min(prev.length, next.length); c++) { - if (prev[c] != next[c]) { - break; - } - } - if (c < next.length) { - window.TTS.new_text = next.substr(c) + window.TTS.new_text; + + async function* yieldTokens(chunks) { + for await (const chunk of chunks) { + const token = chunk.content || chunk.token; + const choice = chunk.completion_probabilities?.[0]; + + const probs = choice?.probs ?? + Object.values(choice?.top_logprobs || chunk.top_logprobs || {}).map(({ token, logprob }) => ({ + tok_str: token, + prob: Math.exp(logprob) + })); + const prob = probs.find(p => p.tok_str === token)?.prob; + + yield { + content: token, + ...(probs.length > 0 ? { + prob: prob ?? -1, + completion_probabilities: [{ + content: token, + probs + }] + } : {}) + }; } + } + + if (options.stream) { + yield* await yieldTokens(parseEventStream(res.body)); } else { - window.TTS.prev_prompt = options.prompt + window.TTS.new_text; + const { completion_probabilities } = await res.json(); + yield* await yieldTokens(completion_probabilities); } - // Trim if string is too long - if (window.TTS.new_text.length > 500) { window.TTS.new_text = window.TTS.new_text.substr(-500) } - // Read out TTS.new_text - if (window.TTS.enabled) { setTimeout(textToSpeech, 20) } } -async function koboldCppTokenCount({ endpoint, proxyEndpoint, signal, ...options }) { +async function koboldCppTokenCount({ endpoint, endpointAPIKey, proxyEndpoint, signal, ...options }) { const res = await fetch(`${proxyEndpoint ?? endpoint}/api/extra/tokencount`, { method: 'POST', headers: { 'Content-Type': 'application/json', + ...(endpointAPIKey ? { 'Authorization': `Bearer ${endpointAPIKey}` } : {}), ...(proxyEndpoint ? { 'X-Real-URL': endpoint } : {}) }, body: JSON.stringify({ @@ -1773,11 +1791,12 @@ return value; } -async function koboldCppTokenize({ endpoint, proxyEndpoint, signal, ...options }) { +async function koboldCppTokenize({ endpoint, endpointAPIKey, proxyEndpoint, signal, ...options }) { const res = await fetch(`${proxyEndpoint ?? endpoint}/api/extra/tokencount`, { method: 'POST', headers: { 'Content-Type': 'application/json', + ...(endpointAPIKey ? { 'Authorization': `Bearer ${endpointAPIKey}` } : {}), ...(proxyEndpoint ? { 'X-Real-URL': endpoint } : {}) }, body: JSON.stringify({ @@ -1793,7 +1812,8 @@ } -function koboldCppConvertOptions(options) { +function koboldCppConvertOptions(options, endpoint) { + const isHorde = endpoint.toLowerCase().includes("aihorde.net"); const swapOption = (lhs, rhs) => { if (lhs in options) { options[rhs] = options[lhs]; @@ -1801,10 +1821,14 @@ } }; if (options.n_predict === -1) { - options.n_predict = 1024; + options.n_predict = isHorde ? 512 : 1024; + } + if (options.n_predict < 16 && isHorde) { + options.n_predict = 16; } swapOption("n_ctx", "max_context_length"); swapOption("n_predict", "max_length"); + swapOption("n_probs", "logprobs"); swapOption("repeat_penalty", "rep_pen"); swapOption("repeat_last_n", "rep_pen_range"); swapOption("tfs_z", "tfs"); @@ -1815,23 +1839,52 @@ return options; } -async function* koboldCppCompletion({ endpoint, proxyEndpoint, signal, ...options }) { - const res = await fetch(`${proxyEndpoint ?? endpoint}/api/extra/generate/stream`, { +async function* koboldCppCompletion({ endpoint, endpointAPIKey, proxyEndpoint, signal, ...options }) { + const res = await fetch(`${proxyEndpoint ?? endpoint}/api/${options.stream ? 'extra/generate/stream' : 'v1/generate'}`, { method: 'POST', headers: { 'Content-Type': 'application/json', + ...(endpointAPIKey ? { 'Authorization': `Bearer ${endpointAPIKey}` } : {}), ...(proxyEndpoint ? { 'X-Real-URL': endpoint } : {}) }, body: JSON.stringify({ - ...koboldCppConvertOptions(options), - stream: true, + ...koboldCppConvertOptions(options, endpoint) }), signal, }); - if (!res.ok) + + if (!res.ok) { throw new Error(`HTTP ${res.status}`); - for await (const chunk of parseEventStream(res.body)) { - yield { content: chunk.token }; + } + + async function* yieldTokens(chunks) { + for await (const chunk of chunks) { + const { token, top_logprobs } = chunk; + + const probs = Object.values(top_logprobs ?? {}).map(({ token, logprob }) => ({ + tok_str: token, + prob: Math.exp(logprob) + })); + const prob = probs.find(p => p.tok_str === token)?.prob; + + yield { + content: token, + ...(probs.length > 0 ? { + prob: prob ?? -1, + completion_probabilities: [{ + content: token, + probs + }] + } : {}) + }; + } + } + + if (options.stream) { + yield* await yieldTokens(parseEventStream(res.body)); + } else { + const { results } = await res.json(); + yield* await yieldTokens(results?.[0].logprobs?.content ?? []); } } @@ -2053,9 +2106,10 @@ return data.map(item => item.id); } -function openaiConvertOptions(options, endpoint){ +function openaiConvertOptions(options, endpoint, isChat) { const isOpenAI = endpoint.toLowerCase().includes("openai.com"); const isTogetherAI = endpoint.toLowerCase().includes("together.xyz"); + const isOpenRouter = endpoint.toLowerCase().includes("openrouter.ai"); const swapOption = (lhs, rhs) => { if (lhs in options) { options[rhs] = options[lhs]; @@ -2083,7 +2137,12 @@ } swapOption("n_ctx", "max_context_length"); swapOption("n_predict", "max_tokens"); - swapOption("n_probs", "logprobs"); + if (isChat) { + options.logprobs = true; + swapOption("n_probs", "top_logprobs"); + } else { + swapOption("n_probs", "logprobs"); + } swapOption("repeat_penalty", "repetition_penalty"); swapOption("repeat_last_n", "repetition_penalty_range"); swapOption("tfs_z", "tfs"); @@ -2102,40 +2161,184 @@ ...(proxyEndpoint ? { 'X-Real-URL': endpoint } : {}) }, body: JSON.stringify({ - ...openaiConvertOptions(options, endpoint), - stream: true, + ...openaiConvertOptions(options, endpoint) }), signal, }); - if (!res.ok) + + if (!res.ok) { + let json; + try { + json = await res.json(); + } catch {} + if (json?.error?.message) { + throw new Error(json.error.message); + } throw new Error(`HTTP ${res.status}`); - for await (const chunk of parseEventStream(res.body)) { - if (!chunk.choices || chunk.choices.length === 0) { - continue; // Skip if there are no choices (should never happen) + } + + async function* yieldTokens(chunks) { + for await (const chunk of chunks) { + if (!chunk.choices || chunk.choices.length === 0) { + if (chunk.content) yield { content: chunk.content }; + continue; + } + + const { text, logprobs } = chunk.choices[0]; + const top_logprobs = logprobs?.top_logprobs?.[0] ?? {}; + + const probs = Object.entries(top_logprobs).map(([tok, logprob]) => ({ + tok_str: tok, + prob: Math.exp(logprob) + })); + const prob = probs.find(p => p.tok_str === text)?.prob; + + yield { + content: text, + ...(probs.length > 0 ? { + prob: prob ?? -1, + completion_probabilities: [{ + content: text, + probs + }] + } : {}) + }; } + } - const { text, logprobs } = chunk.choices[0]; - const top_logprobs = logprobs?.top_logprobs?.[0] ?? {}; - - const probs = Object.entries(top_logprobs).map(([tok, logprob]) => ({ - tok_str: tok, - prob: Math.exp(logprob) - })); - const prob = probs.find(p => p.tok_str === text)?.prob; + if (options.stream) { + yield* await yieldTokens(parseEventStream(res.body)); + } else { + const { content, choices } = await res.json(); + if (choices?.[0].logprobs?.tokens) { + const logprobs = choices[0].logprobs; + const chunks = Object.values(logprobs.tokens).map((token, i) => ({ + choices: [{ + text: token, + logprobs: { top_logprobs: [logprobs.top_logprobs[i]] } + }] + })); + yield* await yieldTokens(chunks); + } else if (choices?.[0].text) { + yield { content: choices[0].text }; + } else if (content) { // llama.cpp specific? + yield { content }; + } + } +} + +async function* openaiBufferUtf8Stream(stream) { + const decoder = new TextDecoder('utf-8', { fatal: false }); + + function parseEscapedString(escapedStr) { + return new Uint8Array( + escapedStr + .split('\\x') + .slice(1) + .map(hex => parseInt(hex, 16)) + ); + } + + const hasEscapedSequence = str => /\\x[0-9a-fA-F]{2}/.test(str); + const encoder = new TextEncoder(); + + for await (const chunk of stream) { + const content = chunk?.choices?.[0]?.delta?.content ?? chunk?.choices?.[0]?.text; + + if (!content) { + yield chunk; + continue; + } + + const binaryData = hasEscapedSequence(content) + ? parseEscapedString(content) + : encoder.encode(content); + + const decoded = decoder.decode(binaryData, { stream: true }); yield { - content: text, - ...(probs.length > 0 ? { - prob: prob ?? -1, - completion_probabilities: [{ - content: text, - probs - }] - } : {}) + ...chunk, + choices: [{ + ...chunk.choices[0], + ...(chunk.choices[0].delta + ? { delta: { ...chunk.choices[0].delta, content: decoded } } + : { text: decoded } + ) + }] }; } } +async function* openaiChatCompletion({ endpoint, endpointAPIKey, proxyEndpoint, signal, ...options }) { + const res = await fetch(`${proxyEndpoint ?? endpoint}/v1/chat/completions`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${endpointAPIKey}`, + ...(proxyEndpoint ? { 'X-Real-URL': endpoint } : {}) + }, + body: JSON.stringify({ + ...openaiConvertOptions(options, endpoint, true) + }), + signal, + }); + + if (!res.ok) { + let json; + try { + json = await res.json(); + } catch {} + if (json?.error?.message) { + throw new Error(json.error.message); + } + throw new Error(`HTTP ${res.status}`); + } + + async function* yieldTokens(chunks) { + for await (const chunk of chunks) { + const token = chunk.choices[0].delta.content; + const top_logprobs = chunk.choices[0].logprobs?.content?.[0]?.top_logprobs ?? {}; + if (!token) continue; + + const probs = Object.values(top_logprobs).map(({ token, logprob }) => ({ + tok_str: token, + prob: Math.exp(logprob) + })); + const prob = probs.find(p => p.tok_str === token)?.prob; + + yield { + content: token, + ...(probs.length > 0 ? { + prob: prob ?? -1, + completion_probabilities: [{ + content: token, + probs + }] + } : {}) + }; + } + } + + if (options.stream) { + yield* await yieldTokens(parseEventStream(res.body)); + } else { + const { choices } = await res.json(); + const chunks = choices?.[0].logprobs?.content; + + if (chunks?.length) { + const formattedChunks = chunks.map(chunk => ({ + choices: [{ + delta: { content: chunk.token }, + logprobs: { content: [{ top_logprobs: chunk.top_logprobs }] } + }] + })); + yield* await yieldTokens(openaiBufferUtf8Stream(formattedChunks)); + } else if (choices?.[0].message?.content) { + yield { content: choices[0].message.content }; + } + } +} + async function openaiOobaAbortCompletion({ endpoint, proxyEndpoint, ...options }) { try { await fetch(`${proxyEndpoint ?? endpoint}/v1/internal/stop-generation`, { @@ -2149,6 +2352,88 @@ } } +async function aiHordeModels({ endpoint, endpointAPIKey, proxyEndpoint, signal, ...options }) { + const res = await fetch(`${proxyEndpoint ?? endpoint}/v2/status/models?type=text`, { + method: 'GET', + headers: { + 'Content-Type': 'application/json', + ...(proxyEndpoint ? { 'X-Real-URL': endpoint } : {}) + }, + signal, + }); + + if (!res.ok) + throw new Error(`HTTP ${res.status}`); + + const response = await res.json(); + + return response + .filter(model => model.type === "text") + .map(model => model.name); +} + +async function* aiHordeCompletion({ endpoint, endpointAPIKey, proxyEndpoint, signal, ...options }) { + const { model, prompt, ...params } = options; + const submitRes = await fetch(`${proxyEndpoint ?? endpoint}/v2/generate/text/async`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Apikey': endpointAPIKey?.trim() ? endpointAPIKey : '0000000000', + ...(proxyEndpoint ? { 'X-Real-URL': endpoint } : {}) + }, + body: JSON.stringify({ + ...(model ? { models: [model] } : {}), + params: { ...koboldCppConvertOptions(params, endpoint) }, + prompt: prompt + }), + signal, + }); + if (!submitRes.ok) + throw new Error(`HTTP ${submitRes.status}`); + const { id: taskId } = await submitRes.json(); + + yield { status: 'queue_init', taskId: taskId }; + + // Poll for results + while (true) { + const checkRes = await fetch(`${proxyEndpoint ?? endpoint}/v2/generate/text/status/${taskId}`, { + headers: { + ...(proxyEndpoint ? { 'X-Real-URL': endpoint } : {}) + }, + signal, + }); + + if (!checkRes.ok) + throw new Error(`HTTP ${checkRes.status}`); + const status = await checkRes.json(); + + yield { status: 'queue_status', position: status.queue_position, waitTime: status.wait_time, processing: status.processing }; + + if (status.done) { + if (status.generations && status.generations.length > 0) { + yield { status: 'done', content: status.generations[0].text }; + } + break; + } + + // Wait before polling again + await new Promise(resolve => setTimeout(resolve, 1000)); + } +} + +async function aiHordeAbortCompletion({ endpoint, proxyEndpoint, hordeTaskId, ...options }) { + try { + await fetch(`${proxyEndpoint ?? endpoint}/v2/generate/text/status/${hordeTaskId}`, { + method: 'DELETE', + headers: { + ...(proxyEndpoint ? { 'X-Real-URL': endpoint } : {}) + }, + }); + } catch (e) { + reportError(e); + } +} + function importSillyTavernWorldInfo(json, setWorldInfo, importBehavior) { setWorldInfo(prevWorldInfo => { let updatedEntries; @@ -2282,7 +2567,7 @@ function Checkbox({ label, value, hidden, onValueChange, ...props }) { return html` -