pion · tyohan · Feb 2, 2025
diff --git a/tts-to-webrtc/README.md b/tts-to-webrtc/README.md
@@ -0,0 +1,79 @@
+# WebRTC Text-to-Speech Example
+
+This is an example app that combines WebRTC with OpenAI's Text-to-Speech API to stream audio in real-time.
+
+## Prerequisites
+
+- Go 1.20 or later
+- An OpenAI API key
+- Web browser with WebRTC support (Chrome, Firefox, Safari, etc.)
+
+## Installation
+
+1. Clone the repository:
+```bash
+git clone <https://github.com/pion/example-webrtc-applications>
+cd tts-to-webrtc
+```
+
+2. Install module dependencies:
+
+[Resampler](https://github.com/dh1tw/gosamplerate) and [opus encoder](https://github.com/hraban/opus) packages are using  cgo modules and need to setup. Follow the instructions below to install the required packages.
+
+Linux:
+using apt (Ubuntu), yum (Centos)...etc.
+```bash
+    $ sudo apt install libsamplerate0 pkg-config libopus-dev libopusfile-dev
+```
+
+MacOS
+using Homebrew:
+```bash
+    $ brew install libsamplerate pkg-config opus opusfile
+```
+
+3. Install Go dependencies:
+```bash
+export GO111MODULE=on
+go install github.com/pion/example-webrtc-applications/v4/tts-to-webrtc@latest
+```
+
+## Configuration
+
+Set your OpenAI API key as an environment variable:
+
+```bash
+export OPENAI_API_KEY=your_api_key_here
+```
+
+## Running the Application
+
+1. Start the server:
+```bash
+go run main.go
+```
+
+2. Open your web browser and navigate to:
+```
+http://localhost:8080
+```
+
+## Usage
+
+1. Click the "Connect" button to establish a WebRTC connection
+2. Wait for the connection status to show "connected"
+3. Type some text in the textarea
+4. Click "Convert to Speech" to hear the text being spoken
+
+## Technical Details
+
+- The application uses OpenAI's TTS API to convert text to speech
+- Audio is streamed using WebRTC with Opus codec
+- Sample rate conversion is handled automatically (24kHz to 48kHz)
+- The server implements a simple audio buffer to handle streaming
+
+
+
+## License
+
+This project is licensed under the MIT License - see the LICENSE file for details.
diff --git a/tts-to-webrtc/index.html b/tts-to-webrtc/index.html
@@ -0,0 +1,172 @@
+<!DOCTYPE html>
+<html>
+<head>
+    <title>WebRTC TTS Demo</title>
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <style>
+        body {
+            font-family: Arial, sans-serif;
+            max-width: 800px;
+            margin: 0 auto;
+            padding: 20px;
+        }
+        .form-group {
+            margin-bottom: 15px;
+        }
+        textarea {
+            width: 100%;
+            padding: 8px;
+        }
+        button {
+            padding: 10px 20px;
+            background-color: #4CAF50;
+            color: white;
+            border: none;
+            border-radius: 4px;
+            cursor: pointer;
+        }
+        button:hover {
+            background-color: #45a049;
+        }
+        button.connected {
+            background-color: #dc3545;
+        }
+        button.connected:hover {
+            background-color: #c82333;
+        }
+    </style>
+</head>
+<body>
+    <h1>Text to Speech with WebRTC</h1>
+    <div id="connectionStatus" style="margin-bottom: 20px; padding: 10px; background-color: #f0f0f0; border-radius: 4px;">
+        <div>Connection State: <span id="connectionState">new</span></div>
+        <div>ICE Connection: <span id="iceConnectionState">new</span></div>
+        <div>Signaling State: <span id="signalingState">new</span></div>
+    </div>
+    <div class="form-group">
+        <button id="connectButton" onclick="toggleConnection()">Connect</button>
+    </div>
+    <div class="form-group">
+        <textarea id="textInput" rows="4" placeholder="Enter text to convert to speech"></textarea>
+    </div>
+    <div class="form-group">
+        <button id="convertButton" onclick="submitText()" disabled>Convert to Speech</button>
+    </div>
+
+    <script>
+        let pc;
+        let isConnected = false;
+
+        async function toggleConnection() {
+            const connectButton = document.getElementById('connectButton');
+            if (!isConnected) {
+                connectButton.disabled = true;
+                await initWebRTC();
+            } else {
+                if (pc) {
+                    await pc.close();
+                    pc = null;
+                }
+                document.getElementById('convertButton').disabled = true;
+                document.getElementById('connectionState').textContent = 'new';
+                document.getElementById('iceConnectionState').textContent = 'new';
+                document.getElementById('signalingState').textContent = 'new';
+                connectButton.textContent = 'Connect';
+                connectButton.classList.remove('connected');
+                isConnected = false;
+            }
+        }
+
+        async function initWebRTC() {
+            pc = new RTCPeerConnection({
+                iceServers: [{
+                    urls: 'stun:stun.l.google.com:19302'
+                }]
+            });
+
+            // Add connection state monitoring
+            pc.onconnectionstatechange = () => {
+                const state = pc.connectionState;
+                const connectButton = document.getElementById('connectButton');
+                document.getElementById('connectionState').textContent = state;
+                if (state === 'connected') {
+                    document.getElementById('convertButton').disabled = false;
+                    connectButton.disabled = false;
+                    connectButton.textContent = 'Disconnect';
+                    connectButton.classList.add('connected');
+                    isConnected = true;
+                } else {
+                    document.getElementById('convertButton').disabled = true;
+                }
+            };
+
+            pc.oniceconnectionstatechange = () => {
+                document.getElementById('iceConnectionState').textContent = pc.iceConnectionState;
+            };
+
+            pc.onsignalingstatechange = () => {
+                document.getElementById('signalingState').textContent = pc.signalingState;
+            };
+
+            pc.ontrack = function(event) {
+                const audio = new Audio();
+                audio.srcObject = event.streams[0];
+                audio.play();
+            };
+
+            // Create promise to wait for ICE gathering
+            const iceCandidatesComplete = new Promise((resolve) => {
+                pc.onicegatheringstatechange = () => {
+                    if (pc.iceGatheringState === 'complete') {
+                        resolve();
+                    }
+                };
+            });
+
+            pc.addTransceiver('audio', { direction: 'recvonly' });
+
+            const offer = await pc.createOffer({
+                offerToReceiveAudio: true
+            });
+            await pc.setLocalDescription(offer);
+
+            // Wait for ICE gathering to complete
+            await iceCandidatesComplete;
+
+            const response = await fetch('/webrtc', {
+                method: 'POST',
+                headers: {
+                    'Content-Type': 'application/json'
+                },
+                body: JSON.stringify({
+                    sdp: JSON.stringify(pc.localDescription)
+                })
+            });
+
+            const answer = await response.json();
+            await pc.setRemoteDescription(new RTCSessionDescription(answer));
+        }
+
+        async function submitText() {
+            const text = document.getElementById('textInput').value;
+            if (!text) return;
+
+            const response = await fetch('/tts', {
+                method: 'POST',
+                headers: {
+                    'Content-Type': 'application/json'
+                },
+                body: JSON.stringify({
+                    text: text
+                })
+            });
+
+            const audioBlob = await response.blob();
+            const audio = new Audio(URL.createObjectURL(audioBlob));
+            audio.play();
+        }
+
+        // Remove the automatic initWebRTC() call at the end
+    </script>
+</body>
+</html>