-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit f94801e
Showing
4 changed files
with
386 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,162 @@ | ||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
*.py[cod] | ||
*$py.class | ||
|
||
# C extensions | ||
*.so | ||
|
||
# Distribution / packaging | ||
.Python | ||
build/ | ||
develop-eggs/ | ||
dist/ | ||
downloads/ | ||
eggs/ | ||
.eggs/ | ||
lib/ | ||
lib64/ | ||
parts/ | ||
sdist/ | ||
var/ | ||
wheels/ | ||
share/python-wheels/ | ||
*.egg-info/ | ||
.installed.cfg | ||
*.egg | ||
MANIFEST | ||
|
||
# PyInstaller | ||
# Usually these files are written by a python script from a template | ||
# before PyInstaller builds the exe, so as to inject date/other infos into it. | ||
*.manifest | ||
*.spec | ||
|
||
# Installer logs | ||
pip-log.txt | ||
pip-delete-this-directory.txt | ||
|
||
# Unit test / coverage reports | ||
htmlcov/ | ||
.tox/ | ||
.nox/ | ||
.coverage | ||
.coverage.* | ||
.cache | ||
nosetests.xml | ||
coverage.xml | ||
*.cover | ||
*.py,cover | ||
.hypothesis/ | ||
.pytest_cache/ | ||
cover/ | ||
|
||
# Translations | ||
*.mo | ||
*.pot | ||
|
||
# Django stuff: | ||
*.log | ||
local_settings.py | ||
db.sqlite3 | ||
db.sqlite3-journal | ||
|
||
# Flask stuff: | ||
instance/ | ||
.webassets-cache | ||
|
||
# Scrapy stuff: | ||
.scrapy | ||
|
||
# Sphinx documentation | ||
docs/_build/ | ||
|
||
# PyBuilder | ||
.pybuilder/ | ||
target/ | ||
|
||
# Jupyter Notebook | ||
.ipynb_checkpoints | ||
|
||
# IPython | ||
profile_default/ | ||
ipython_config.py | ||
|
||
# pyenv | ||
# For a library or package, you might want to ignore these files since the code is | ||
# intended to run in multiple environments; otherwise, check them in: | ||
# .python-version | ||
|
||
# pipenv | ||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. | ||
# However, in case of collaboration, if having platform-specific dependencies or dependencies | ||
# having no cross-platform support, pipenv may install dependencies that don't work, or not | ||
# install all needed dependencies. | ||
#Pipfile.lock | ||
|
||
# poetry | ||
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. | ||
# This is especially recommended for binary packages to ensure reproducibility, and is more | ||
# commonly ignored for libraries. | ||
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control | ||
#poetry.lock | ||
|
||
# pdm | ||
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. | ||
#pdm.lock | ||
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it | ||
# in version control. | ||
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control | ||
.pdm.toml | ||
.pdm-python | ||
.pdm-build/ | ||
|
||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm | ||
__pypackages__/ | ||
|
||
# Celery stuff | ||
celerybeat-schedule | ||
celerybeat.pid | ||
|
||
# SageMath parsed files | ||
*.sage.py | ||
|
||
# Environments | ||
.env | ||
.venv | ||
env/ | ||
venv/ | ||
ENV/ | ||
env.bak/ | ||
venv.bak/ | ||
|
||
# Spyder project settings | ||
.spyderproject | ||
.spyproject | ||
|
||
# Rope project settings | ||
.ropeproject | ||
|
||
# mkdocs documentation | ||
/site | ||
|
||
# mypy | ||
.mypy_cache/ | ||
.dmypy.json | ||
dmypy.json | ||
|
||
# Pyre type checker | ||
.pyre/ | ||
|
||
# pytype static type analyzer | ||
.pytype/ | ||
|
||
# Cython debug symbols | ||
cython_debug/ | ||
|
||
# PyCharm | ||
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can | ||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore | ||
# and can be added to the global gitignore or merged into this file. For a more nuclear | ||
# option (not recommended) you can uncomment the following to ignore the entire idea folder. | ||
#.idea/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
# Real Time Speech To Text (Using OpenAi Whisper) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
# app.py | ||
from flask import Flask, render_template | ||
from flask_socketio import SocketIO | ||
import sounddevice as sd | ||
import numpy as np | ||
import threading | ||
import queue | ||
import whisper | ||
import time | ||
|
||
app = Flask(__name__) | ||
socketio = SocketIO(app) | ||
|
||
# Load Whisper model | ||
model = whisper.load_model("small") | ||
|
||
# Audio recording parameters | ||
SAMPLE_RATE = 16000 | ||
CHANNELS = 1 | ||
CHUNK_DURATION = 1 | ||
CHUNK_SIZE = int(SAMPLE_RATE * CHUNK_DURATION) | ||
ENERGY_THRESHOLD = 0.002 | ||
|
||
# Set up the audio queue | ||
audio_queue = queue.Queue() | ||
is_recording = False | ||
recording_thread = None | ||
|
||
def audio_callback(indata, frames, time, status): | ||
"""Callback function to handle incoming audio data.""" | ||
if status: | ||
print(f"Status: {status}") | ||
if is_recording: | ||
audio_queue.put(indata.copy().flatten().astype(np.float32)) | ||
|
||
def transcribe_audio(): | ||
"""Process audio from queue and transcribe using Whisper.""" | ||
last_transcription_time = 0 | ||
min_gap = 0.75 | ||
|
||
while is_recording: | ||
if not audio_queue.empty(): | ||
audio_data = [] | ||
while not audio_queue.empty(): | ||
audio_data.append(audio_queue.get()) | ||
|
||
audio_chunk = np.concatenate(audio_data, axis=0).astype(np.float32) | ||
|
||
if (np.mean(np.abs(audio_chunk)) > ENERGY_THRESHOLD and | ||
(time.time() - last_transcription_time) > min_gap): | ||
|
||
last_transcription_time = time.time() | ||
result = model.transcribe(audio_chunk, language='en', without_timestamps=True) | ||
transcribed_text = result['text'].strip() | ||
|
||
if transcribed_text: | ||
# Emit the transcription to connected clients | ||
socketio.emit('transcription', {'text': transcribed_text}) | ||
|
||
def start_recording(): | ||
global is_recording, recording_thread | ||
if not is_recording: | ||
is_recording = True | ||
recording_thread = threading.Thread(target=transcribe_audio, daemon=True) | ||
recording_thread.start() | ||
|
||
# Start the audio stream | ||
sd.InputStream( | ||
channels=CHANNELS, | ||
samplerate=SAMPLE_RATE, | ||
callback=audio_callback | ||
).start() | ||
|
||
def stop_recording(): | ||
global is_recording | ||
is_recording = False | ||
if recording_thread: | ||
recording_thread.join() | ||
|
||
@app.route('/') | ||
def index(): | ||
return render_template('index.html') | ||
|
||
@socketio.on('start_recording') | ||
def handle_start_recording(): | ||
start_recording() | ||
return {'status': 'success', 'message': 'Recording started'} | ||
|
||
@socketio.on('stop_recording') | ||
def handle_stop_recording(): | ||
stop_recording() | ||
return {'status': 'success', 'message': 'Recording stopped'} | ||
|
||
if __name__ == '__main__': | ||
socketio.run(app, debug=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
<!DOCTYPE html> | ||
<html lang="en"> | ||
<head> | ||
<meta charset="UTF-8"> | ||
<meta name="viewport" content="width=device-width, initial-scale=1.0"> | ||
<title>Real-time Speech Transcription</title> | ||
<script src="https://cdnjs.cloudflare.com/ajax/libs/socket.io/4.0.1/socket.io.js"></script> | ||
<style> | ||
body { | ||
font-family: Arial, sans-serif; | ||
max-width: 800px; | ||
margin: 0 auto; | ||
padding: 20px; | ||
} | ||
|
||
.container { | ||
background-color: #f5f5f5; | ||
border-radius: 8px; | ||
padding: 20px; | ||
margin-top: 20px; | ||
} | ||
|
||
#transcription-box { | ||
min-height: 200px; | ||
max-height: 400px; | ||
overflow-y: auto; | ||
background-color: white; | ||
border: 1px solid #ddd; | ||
border-radius: 4px; | ||
padding: 15px; | ||
margin: 20px 0; | ||
} | ||
|
||
.controls { | ||
display: flex; | ||
gap: 10px; | ||
margin-bottom: 20px; | ||
} | ||
|
||
button { | ||
padding: 10px 20px; | ||
border: none; | ||
border-radius: 4px; | ||
cursor: pointer; | ||
font-size: 16px; | ||
transition: background-color 0.3s; | ||
} | ||
|
||
#startBtn { | ||
background-color: #4CAF50; | ||
color: white; | ||
} | ||
|
||
#stopBtn { | ||
background-color: #f44336; | ||
color: white; | ||
} | ||
|
||
button:hover { | ||
opacity: 0.9; | ||
} | ||
|
||
button:disabled { | ||
background-color: #cccccc; | ||
cursor: not-allowed; | ||
} | ||
|
||
.status { | ||
margin-top: 10px; | ||
font-style: italic; | ||
color: #666; | ||
} | ||
</style> | ||
</head> | ||
<body> | ||
<div class="container"> | ||
<h1>Real-time Speech Transcription</h1> | ||
|
||
<div class="controls"> | ||
<button id="startBtn">Start Recording</button> | ||
<button id="stopBtn" disabled>Stop Recording</button> | ||
</div> | ||
|
||
<div class="status" id="status">Status: Ready</div> | ||
|
||
<div id="transcription-box"></div> | ||
</div> | ||
|
||
<script> | ||
const socket = io(); | ||
const startBtn = document.getElementById('startBtn'); | ||
const stopBtn = document.getElementById('stopBtn'); | ||
const status = document.getElementById('status'); | ||
const transcriptionBox = document.getElementById('transcription-box'); | ||
|
||
startBtn.addEventListener('click', () => { | ||
socket.emit('start_recording'); | ||
startBtn.disabled = true; | ||
stopBtn.disabled = false; | ||
status.textContent = 'Status: Recording...'; | ||
}); | ||
|
||
stopBtn.addEventListener('click', () => { | ||
socket.emit('stop_recording'); | ||
startBtn.disabled = false; | ||
stopBtn.disabled = true; | ||
status.textContent = 'Status: Stopped'; | ||
}); | ||
|
||
socket.on('transcription', (data) => { | ||
const p = document.createElement('p'); | ||
p.textContent = data.text; | ||
transcriptionBox.appendChild(p); | ||
transcriptionBox.scrollTop = transcriptionBox.scrollHeight; | ||
}); | ||
|
||
socket.on('connect', () => { | ||
status.textContent = 'Status: Connected'; | ||
}); | ||
|
||
socket.on('disconnect', () => { | ||
status.textContent = 'Status: Disconnected'; | ||
startBtn.disabled = false; | ||
stopBtn.disabled = true; | ||
}); | ||
</script> | ||
</body> | ||
</html> |