Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
ajxv committed Nov 4, 2024
0 parents commit f94801e
Show file tree
Hide file tree
Showing 4 changed files with 386 additions and 0 deletions.
162 changes: 162 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock

# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
.pdm.toml
.pdm-python
.pdm-build/

# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Real Time Speech To Text (Using OpenAi Whisper)
95 changes: 95 additions & 0 deletions app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# app.py
from flask import Flask, render_template
from flask_socketio import SocketIO
import sounddevice as sd
import numpy as np
import threading
import queue
import whisper
import time

app = Flask(__name__)
socketio = SocketIO(app)

# Load Whisper model
model = whisper.load_model("small")

# Audio recording parameters
SAMPLE_RATE = 16000
CHANNELS = 1
CHUNK_DURATION = 1
CHUNK_SIZE = int(SAMPLE_RATE * CHUNK_DURATION)
ENERGY_THRESHOLD = 0.002

# Set up the audio queue
audio_queue = queue.Queue()
is_recording = False
recording_thread = None

def audio_callback(indata, frames, time, status):
"""Callback function to handle incoming audio data."""
if status:
print(f"Status: {status}")
if is_recording:
audio_queue.put(indata.copy().flatten().astype(np.float32))

def transcribe_audio():
"""Process audio from queue and transcribe using Whisper."""
last_transcription_time = 0
min_gap = 0.75

while is_recording:
if not audio_queue.empty():
audio_data = []
while not audio_queue.empty():
audio_data.append(audio_queue.get())

audio_chunk = np.concatenate(audio_data, axis=0).astype(np.float32)

if (np.mean(np.abs(audio_chunk)) > ENERGY_THRESHOLD and
(time.time() - last_transcription_time) > min_gap):

last_transcription_time = time.time()
result = model.transcribe(audio_chunk, language='en', without_timestamps=True)
transcribed_text = result['text'].strip()

if transcribed_text:
# Emit the transcription to connected clients
socketio.emit('transcription', {'text': transcribed_text})

def start_recording():
global is_recording, recording_thread
if not is_recording:
is_recording = True
recording_thread = threading.Thread(target=transcribe_audio, daemon=True)
recording_thread.start()

# Start the audio stream
sd.InputStream(
channels=CHANNELS,
samplerate=SAMPLE_RATE,
callback=audio_callback
).start()

def stop_recording():
global is_recording
is_recording = False
if recording_thread:
recording_thread.join()

@app.route('/')
def index():
return render_template('index.html')

@socketio.on('start_recording')
def handle_start_recording():
start_recording()
return {'status': 'success', 'message': 'Recording started'}

@socketio.on('stop_recording')
def handle_stop_recording():
stop_recording()
return {'status': 'success', 'message': 'Recording stopped'}

if __name__ == '__main__':
socketio.run(app, debug=True)
128 changes: 128 additions & 0 deletions templates/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Real-time Speech Transcription</title>
<script src="https://cdnjs.cloudflare.com/ajax/libs/socket.io/4.0.1/socket.io.js"></script>
<style>
body {
font-family: Arial, sans-serif;
max-width: 800px;
margin: 0 auto;
padding: 20px;
}

.container {
background-color: #f5f5f5;
border-radius: 8px;
padding: 20px;
margin-top: 20px;
}

#transcription-box {
min-height: 200px;
max-height: 400px;
overflow-y: auto;
background-color: white;
border: 1px solid #ddd;
border-radius: 4px;
padding: 15px;
margin: 20px 0;
}

.controls {
display: flex;
gap: 10px;
margin-bottom: 20px;
}

button {
padding: 10px 20px;
border: none;
border-radius: 4px;
cursor: pointer;
font-size: 16px;
transition: background-color 0.3s;
}

#startBtn {
background-color: #4CAF50;
color: white;
}

#stopBtn {
background-color: #f44336;
color: white;
}

button:hover {
opacity: 0.9;
}

button:disabled {
background-color: #cccccc;
cursor: not-allowed;
}

.status {
margin-top: 10px;
font-style: italic;
color: #666;
}
</style>
</head>
<body>
<div class="container">
<h1>Real-time Speech Transcription</h1>

<div class="controls">
<button id="startBtn">Start Recording</button>
<button id="stopBtn" disabled>Stop Recording</button>
</div>

<div class="status" id="status">Status: Ready</div>

<div id="transcription-box"></div>
</div>

<script>
const socket = io();
const startBtn = document.getElementById('startBtn');
const stopBtn = document.getElementById('stopBtn');
const status = document.getElementById('status');
const transcriptionBox = document.getElementById('transcription-box');

startBtn.addEventListener('click', () => {
socket.emit('start_recording');
startBtn.disabled = true;
stopBtn.disabled = false;
status.textContent = 'Status: Recording...';
});

stopBtn.addEventListener('click', () => {
socket.emit('stop_recording');
startBtn.disabled = false;
stopBtn.disabled = true;
status.textContent = 'Status: Stopped';
});

socket.on('transcription', (data) => {
const p = document.createElement('p');
p.textContent = data.text;
transcriptionBox.appendChild(p);
transcriptionBox.scrollTop = transcriptionBox.scrollHeight;
});

socket.on('connect', () => {
status.textContent = 'Status: Connected';
});

socket.on('disconnect', () => {
status.textContent = 'Status: Disconnected';
startBtn.disabled = false;
stopBtn.disabled = true;
});
</script>
</body>
</html>

0 comments on commit f94801e

Please sign in to comment.