-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathspeech2text.go
91 lines (75 loc) · 2.11 KB
/
speech2text.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
package main
import (
"bufio"
"context"
"fmt"
"os"
"os/user"
"path/filepath"
"time"
"github.com/devinyf/dashscopego"
"github.com/devinyf/dashscopego/paraformer"
)
func main() {
model := paraformer.ParaformerRealTimeV1
token := os.Getenv("DASHSCOPE_API_KEY")
if token == "" {
panic("token is empty")
}
cli := dashscopego.NewTongyiClient(model, token)
streamCallbackFn := func(_ context.Context, chunk []byte) error {
fmt.Println("realtime output:", string(chunk)) //nolint:all
return nil
}
headerPara := paraformer.ReqHeader{
Streaming: "duplex",
TaskID: paraformer.GenerateTaskID(),
Action: "run-task",
}
payload := paraformer.PayloadIn{
Parameters: paraformer.Parameters{
// seems like only support 16000 sample-rate.
SampleRate: 16000,
Format: "pcm",
DisfluencyRemovalEnabled: true,
LanguageHints: []string{"zh", "en"},
},
Input: map[string]interface{}{},
Task: "asr",
TaskGroup: "audio",
Function: "recognition",
}
req := ¶former.Request{
Header: headerPara,
Payload: payload,
StreamingFn: streamCallbackFn,
}
// 声音获取 实际使用时请替换成实时音频流.
voiceReader := readAudioFromDesktop()
reader := bufio.NewReader(voiceReader)
ctx := context.Background()
if err := cli.CreateSpeechToTextGeneration(ctx, req, reader); err != nil {
panic(err)
}
// 等待语音识别结果输出
time.Sleep(5 * time.Second)
// 关闭语音识别连接
if err := cli.CloseSpeechToTextGeneration(); err != nil {
panic(err)
}
}
// 读取音频文件中的录音 模拟实时语音流. 这里下载的官方文档中的示例音频文件.
// `https://dashscope.oss-cn-beijing.aliyuncs.com/samples/audio/paraformer/hello_world_male2.wav`.
func readAudioFromDesktop() *bufio.Reader {
usr, err := user.Current()
if err != nil {
panic(err)
}
voiceFilePath := filepath.Join(usr.HomeDir, "Desktop", "hello_world_female2.wav")
voice, err := os.OpenFile(voiceFilePath, os.O_RDONLY, 0640) //nolint:gofumpt
if err != nil {
panic(err)
}
reader := bufio.NewReader(voice)
return reader
}