initial commit

ajxv · Nov 4, 2024 · f94801e · f94801e
commit f94801e
Show file tree

Hide file tree

Showing 4 changed files with 386 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,162 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
diff --git a/README.md b/README.md
@@ -0,0 +1 @@
+# Real Time Speech To Text (Using OpenAi Whisper)
diff --git a/app.py b/app.py
@@ -0,0 +1,95 @@
+# app.py
+from flask import Flask, render_template
+from flask_socketio import SocketIO
+import sounddevice as sd
+import numpy as np
+import threading
+import queue
+import whisper
+import time
+
+app = Flask(__name__)
+socketio = SocketIO(app)
+
+# Load Whisper model
+model = whisper.load_model("small")
+
+# Audio recording parameters
+SAMPLE_RATE = 16000
+CHANNELS = 1
+CHUNK_DURATION = 1
+CHUNK_SIZE = int(SAMPLE_RATE * CHUNK_DURATION)
+ENERGY_THRESHOLD = 0.002
+
+# Set up the audio queue
+audio_queue = queue.Queue()
+is_recording = False
+recording_thread = None
+
+def audio_callback(indata, frames, time, status):
+    """Callback function to handle incoming audio data."""
+    if status:
+        print(f"Status: {status}")
+    if is_recording:
+        audio_queue.put(indata.copy().flatten().astype(np.float32))
+
+def transcribe_audio():
+    """Process audio from queue and transcribe using Whisper."""
+    last_transcription_time = 0
+    min_gap = 0.75
+
+    while is_recording:
+        if not audio_queue.empty():
+            audio_data = []
+            while not audio_queue.empty():
+                audio_data.append(audio_queue.get())
+
+            audio_chunk = np.concatenate(audio_data, axis=0).astype(np.float32)
+
+            if (np.mean(np.abs(audio_chunk)) > ENERGY_THRESHOLD and 
+                (time.time() - last_transcription_time) > min_gap):
+
+                last_transcription_time = time.time()
+                result = model.transcribe(audio_chunk, language='en', without_timestamps=True)
+                transcribed_text = result['text'].strip()
+
+                if transcribed_text:
+                    # Emit the transcription to connected clients
+                    socketio.emit('transcription', {'text': transcribed_text})
+
+def start_recording():
+    global is_recording, recording_thread
+    if not is_recording:
+        is_recording = True
+        recording_thread = threading.Thread(target=transcribe_audio, daemon=True)
+        recording_thread.start()
+
+        # Start the audio stream
+        sd.InputStream(
+            channels=CHANNELS,
+            samplerate=SAMPLE_RATE,
+            callback=audio_callback
+        ).start()
+
+def stop_recording():
+    global is_recording
+    is_recording = False
+    if recording_thread:
+        recording_thread.join()
+
+@app.route('/')
+def index():
+    return render_template('index.html')
+
+@socketio.on('start_recording')
+def handle_start_recording():
+    start_recording()
+    return {'status': 'success', 'message': 'Recording started'}
+
+@socketio.on('stop_recording')
+def handle_stop_recording():
+    stop_recording()
+    return {'status': 'success', 'message': 'Recording stopped'}
+
+if __name__ == '__main__':
+    socketio.run(app, debug=True)
diff --git a/templates/index.html b/templates/index.html
@@ -0,0 +1,128 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Real-time Speech Transcription</title>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/socket.io/4.0.1/socket.io.js"></script>
+    <style>
+        body {
+            font-family: Arial, sans-serif;
+            max-width: 800px;
+            margin: 0 auto;
+            padding: 20px;
+        }
+
+        .container {
+            background-color: #f5f5f5;
+            border-radius: 8px;
+            padding: 20px;
+            margin-top: 20px;
+        }
+
+        #transcription-box {
+            min-height: 200px;
+            max-height: 400px;
+            overflow-y: auto;
+            background-color: white;
+            border: 1px solid #ddd;
+            border-radius: 4px;
+            padding: 15px;
+            margin: 20px 0;
+        }
+
+        .controls {
+            display: flex;
+            gap: 10px;
+            margin-bottom: 20px;
+        }
+
+        button {
+            padding: 10px 20px;
+            border: none;
+            border-radius: 4px;
+            cursor: pointer;
+            font-size: 16px;
+            transition: background-color 0.3s;
+        }
+
+        #startBtn {
+            background-color: #4CAF50;
+            color: white;
+        }
+
+        #stopBtn {
+            background-color: #f44336;
+            color: white;
+        }
+
+        button:hover {
+            opacity: 0.9;
+        }
+
+        button:disabled {
+            background-color: #cccccc;
+            cursor: not-allowed;
+        }
+
+        .status {
+            margin-top: 10px;
+            font-style: italic;
+            color: #666;
+        }
+    </style>
+</head>
+<body>
+    <div class="container">
+        <h1>Real-time Speech Transcription</h1>
+
+        <div class="controls">
+            <button id="startBtn">Start Recording</button>
+            <button id="stopBtn" disabled>Stop Recording</button>
+        </div>
+
+        <div class="status" id="status">Status: Ready</div>
+
+        <div id="transcription-box"></div>
+    </div>
+
+    <script>
+        const socket = io();
+        const startBtn = document.getElementById('startBtn');
+        const stopBtn = document.getElementById('stopBtn');
+        const status = document.getElementById('status');
+        const transcriptionBox = document.getElementById('transcription-box');
+
+        startBtn.addEventListener('click', () => {
+            socket.emit('start_recording');
+            startBtn.disabled = true;
+            stopBtn.disabled = false;
+            status.textContent = 'Status: Recording...';
+        });
+
+        stopBtn.addEventListener('click', () => {
+            socket.emit('stop_recording');
+            startBtn.disabled = false;
+            stopBtn.disabled = true;
+            status.textContent = 'Status: Stopped';
+        });
+
+        socket.on('transcription', (data) => {
+            const p = document.createElement('p');
+            p.textContent = data.text;
+            transcriptionBox.appendChild(p);
+            transcriptionBox.scrollTop = transcriptionBox.scrollHeight;
+        });
+
+        socket.on('connect', () => {
+            status.textContent = 'Status: Connected';
+        });
+
+        socket.on('disconnect', () => {
+            status.textContent = 'Status: Disconnected';
+            startBtn.disabled = false;
+            stopBtn.disabled = true;
+        });
+    </script>
+</body>
+</html>
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		# Real Time Speech To Text (Using OpenAi Whisper)