-
Notifications
You must be signed in to change notification settings - Fork 6
/
index.js
133 lines (106 loc) · 4.56 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
const express = require('express');
const cors = require('cors');
const multer = require('multer')
const FormData = require('form-data');
const { Readable } = require('stream');
const axios = require('axios');
const ffmpeg = require('fluent-ffmpeg');
const ffmpegPath = require('ffmpeg-static');
const ffmetadata = require('ffmetadata');
const fs = require('fs');
const app = express();
app.use(cors());
const bufferToStream = (buffer) => {
return Readable.from(buffer);
}
/**
* Convert a time string of the format 'mm:ss' into seconds.
* @param {string} timeString - A time string in the format 'mm:ss'.
* @return {number} - The time in seconds.
*/
const parseTimeStringToSeconds = timeString => {
const [minutes, seconds] = timeString.split(':').map(tm => parseInt(tm));
return minutes * 60 + seconds;
}
const upload = multer();
ffmpeg.setFfmpegPath(ffmpegPath);
app.use(express.json());
app.get('/', (req, res) => {
res.send('Welcome to the Whisper Text-to-Speech API!');
});
app.post('/api/transcribe', upload.single('file'), async (req, res) => {
const audioFile = req.file;
const startTime = req.body.startTime;
const endTime = req.body.endTime;
if (!audioFile) {
res.status(400).json({ message: 'Audio file is required.' });
return;
}
if (!startTime || !endTime) {
res.status(400).json({ message: 'Start and end times are required.' });
return;
}
// Parse and calculate the duration
const startSeconds = parseTimeStringToSeconds(startTime);
const endSeconds = parseTimeStringToSeconds(endTime);
const timeDuration = endSeconds - startSeconds;
try {
const audioFile = req.file;
if (!audioFile) {
return res.status(400).json({ error: 'No audio file provided' });
}
const audioStream = bufferToStream(audioFile.buffer);
const trimAudio = async (audioStream, endTime) => {
const tempFileName = `temp-${Date.now()}.mp3`;
const outputFileName = `output-${Date.now()}.mp3`;
return new Promise((resolve, reject) => {
audioStream.pipe(fs.createWriteStream(tempFileName))
.on('finish', () => {
ffmetadata.read(tempFileName, (err, metadata) => {
if (err) reject(err);
const duration = parseFloat(metadata.duration);
if (endTime > duration) endTime = duration;
ffmpeg(tempFileName)
.setStartTime(startSeconds)
.setDuration(timeDuration)
.output(outputFileName)
.on('end', () => {
fs.unlink(tempFileName, (err) => {
if (err) console.error('Error deleting temp file:', err);
});
const trimmedAudioBuffer = fs.readFileSync(outputFileName);
fs.unlink(outputFileName, (err) => {
if (err) console.error('Error deleting output file:', err);
});
resolve(trimmedAudioBuffer);
})
.on('error', reject)
.run();
});
})
.on('error', reject);
});
};
const trimmedAudioBuffer = await trimAudio(audioStream, endTime);
// Call the OpenAI Whisper API to transcribe the audio file
const formData = new FormData();
formData.append('file', trimmedAudioBuffer, { filename: 'audio.mp3', contentType: audioFile.mimetype });
formData.append('model', 'whisper-1');
formData.append('response_format', 'json');
const config = {
headers: {
"Content-Type": `multipart/form-data; boundary=${formData._boundary}`,
"Authorization": `Bearer ${process.env.OPENAI_API_KEY}`,
},
};
const response = await axios.post('https://api.openai.com/v1/audio/transcriptions', formData, config);
const transcription = response.data.text;
res.json({ transcription });
} catch (error) {
res.status(500).json({ error: 'Error transcribing audio' });
}
});
const PORT = process.env.PORT || 3001;
app.listen(PORT, () => {
console.log(`Server is running on port ${PORT}`);
});