-
Notifications
You must be signed in to change notification settings - Fork 82
/
spk2info.txt
469 lines (469 loc) · 36.4 KB
/
spk2info.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
{'中文女': {'embedding': tensor([[-9.0311e-01, 2.4428e+00, 5.3202e-02, 1.1784e+00, -7.7890e-01,
7.3300e-01, 1.0855e+00, 3.9727e-01, 1.2000e+00, -1.1464e+00,
1.4309e+00, -2.4534e-01, 1.9687e+00, -2.0198e+00, -4.6386e-01,
-2.1271e+00, -6.9417e-01, -6.7190e-01, 9.2530e-02, -6.6294e-01,
-1.4480e+00, 9.9946e-01, 3.0862e-01, -1.0641e+00, 1.1420e+00,
2.9963e-01, -3.8379e-02, 7.1181e-01, 3.2029e-01, -1.3339e+00,
-1.1267e+00, -1.4448e+00, 5.2440e-04, -1.1894e+00, -8.4154e-01,
-1.1474e+00, 5.4266e-01, 1.3111e+00, 1.4149e+00, -6.0935e-01,
-4.0102e-01, -1.1532e+00, 1.7652e+00, -5.1973e-01, 1.5713e+00,
8.2115e-02, 5.2128e-01, 5.9423e-02, 2.0231e+00, -7.6898e-01,
2.1437e-01, -1.7442e+00, 7.3698e-01, 5.0766e-01, 2.2924e-01,
8.5808e-01, 4.6188e-01, -1.5217e+00, 1.9724e+00, -9.1853e-01,
-4.7492e-01, -5.7958e-01, 4.0531e-02, -4.4168e-01, -1.0522e+00,
-1.5480e+00, -1.4951e-01, 7.7455e-01, -1.6904e-01, 6.1674e-02,
1.6403e+00, -8.2473e-01, -5.8671e-01, -1.3640e+00, 7.3164e-01,
8.3433e-01, 9.1687e-01, -9.2262e-02, 8.5859e-01, -1.9806e-01,
5.0999e-01, -7.1520e-01, 4.3584e-01, 6.2446e-01, 5.8434e-01,
-1.0077e+00, -2.9693e-01, -9.1880e-01, -1.0739e+00, -9.8572e-01,
2.3599e+00, 8.8236e-01, 7.4872e-01, 6.8751e-01, 3.8719e-01,
-1.6051e+00, -6.4754e-01, -3.2314e+00, 1.2784e+00, 1.1242e+00,
8.4952e-01, -4.4329e-01, 7.5583e-01, 1.4274e+00, -1.1416e+00,
2.0034e-01, 7.7534e-01, -2.3794e-01, 3.7495e-01, -2.7319e-01,
5.2984e-01, 2.1585e-01, -1.2391e+00, -1.0856e+00, -5.8321e-01,
8.3495e-01, -1.0879e+00, 3.4334e-03, -1.5998e+00, 1.0349e+00,
1.4543e+00, -2.2842e+00, -3.6158e-01, -2.1038e-01, -2.2228e+00,
-4.9595e-01, 7.1644e-01, -1.3224e+00, 1.3268e+00, 5.4674e-01,
-8.0411e-01, -6.9220e-02, -4.5076e-02, 1.6976e+00, -7.4012e-02,
1.9025e-01, -5.3636e-01, -1.1344e-01, 2.2000e-01, -5.9153e-01,
-8.8950e-02, -1.7833e+00, -5.1166e-01, -1.4820e+00, -1.1016e-02,
-1.6954e+00, -9.7188e-01, -3.6324e-01, -1.7158e-01, 8.2002e-01,
-9.1027e-01, 1.0510e+00, 1.0104e+00, 2.3783e+00, 7.3361e-01,
-6.3397e-01, -8.7724e-01, -1.3077e+00, -1.5522e+00, -6.2157e-01,
1.4270e+00, -6.5892e-01, 1.4831e-01, -1.0042e+00, 2.0797e+00,
6.8092e-01, -6.7876e-01, 2.9071e-01, 2.8133e-01, -1.8649e-01,
-3.1936e-01, 6.3171e-02, 2.4718e-02, -6.6571e-01, 1.7916e-01,
1.7553e-01, -5.3749e-01, 1.5378e-01, 4.8241e-01, -7.7934e-01,
5.2003e-01, -4.8131e-01, 1.4069e+00, -9.0451e-01, 1.6606e+00,
1.3522e-02, 3.6204e-01, -5.4462e-01, -2.5318e-01, 1.1155e+00,
5.0736e-01, 2.1480e+00]], device='cuda:0'), 'speech_token': tensor([[ 742, 669, 667, 1651, 730, 658, 658, 658, 13, 512, 3575, 3575,
58, 58, 58, 1460, 1460, 1460, 550, 550, 649, 271, 271, 21,
311, 441, 596, 211, 211, 823, 154, 548, 548, 48, 48, 258,
674, 490, 3254, 322, 193, 193, 193, 356, 418, 201, 439, 439,
135, 673, 204, 678, 75, 75, 75, 75, 237, 216, 216, 216,
440, 440, 237, 237, 216, 440, 440, 237, 216, 216, 216, 216,
216, 440, 440, 440, 230, 473, 631, 345, 53, 598, 483, 483,
483, 892, 462, 2828, 2828, 1006, 59, 414, 721, 212, 3766, 49,
537, 540, 380, 49, 513, 585, 69, 420, 536, 457, 69, 20,
351, 465, 515, 515, 3965, 3965, 478, 513, 3950, 173, 175, 175,
173, 1206, 537, 537, 537, 537, 537, 537, 537, 740, 344, 424,
394, 394, 394, 481, 481, 609, 669, 669, 667, 658, 658, 658,
658, 730, 658, 658, 658, 658, 658, 658, 692, 658, 658, 658,
658, 658, 658, 658, 658, 658, 658, 658, 658, 658, 658, 658,
658, 658, 658, 658, 192, 373, 584, 12, 313, 621, 731, 103,
143, 465, 212, 706, 675, 58, 58, 58, 58, 58, 531, 458,
458, 1006, 550, 51, 64, 732, 1930, 281, 1988, 309, 502, 1935,
4, 681, 468, 2723, 137, 707, 3254, 322, 356, 356, 356, 356,
356, 3966, 1529, 683, 1531, 127, 313, 690, 411, 52, 143, 465,
227, 49, 49, 466, 306, 42, 369, 1089, 1089, 1089, 1089, 189,
725, 227, 227, 704, 371, 224, 274, 274, 48, 64, 64, 258,
96, 123, 559, 419, 676, 275, 612, 64, 64, 1381, 1479, 289,
608, 750, 317, 441, 260, 250, 608, 159, 374, 1, 2344, 39,
1519, 629, 309, 317, 495, 196, 2031, 332, 332, 2454, 1037, 197,
351, 515, 515, 383, 427, 11, 3539, 620, 620, 312, 446, 3108,
437, 437, 523, 722, 199, 138, 332, 7, 70, 70, 678, 144,
505, 75, 75, 75, 75, 71, 237, 440, 440, 440, 216, 216,
230, 230, 230, 230, 230, 230, 230, 106, 631, 249, 436, 568,
312, 535, 633, 58, 672, 672, 59, 326, 513, 232, 596, 153,
391, 151, 3950, 439, 559, 123, 600, 439, 107, 2782, 3290, 177,
177, 177, 2752, 2782, 2782, 645, 219, 516, 516, 516, 547, 276,
276, 1938, 1, 302, 302, 302, 302, 238, 598, 1357, 1357, 619,
3370, 116, 10, 10, 10, 10, 10, 10, 3836, 317, 2299, 64,
48, 623, 87, 298, 298, 427, 274, 274, 2691, 58, 672, 333,
312, 2828, 82, 432, 310, 623, 462, 4085, 20, 270, 235, 363,
391, 173, 362, 52, 143, 465, 465, 465, 49, 110, 79, 234,
211, 502, 4, 4, 228, 386, 386, 386, 648, 88, 33, 348,
3605, 731, 371, 337, 10, 10, 10, 576, 563, 1405, 655, 407,
655, 271, 415, 415, 21, 51, 51, 2185, 274, 263, 394, 394,
206, 483, 503, 2280, 382, 600, 3966, 257, 59, 84, 437, 437,
437, 215, 55, 626, 286, 212, 351, 606, 3823, 333, 333, 312,
1006, 1006, 550, 314, 655, 271, 496, 744, 685, 331, 331, 3190,
658, 658, 717]], device='cuda:0'), 'speech_feat': tensor([[[ -7.3653, -7.5106, -7.8205, ..., -11.3804, -11.5129, -11.5129],
[ -7.2448, -7.4869, -7.9518, ..., -11.4072, -11.5129, -11.5129],
[ -7.1422, -7.2470, -8.6791, ..., -11.5129, -11.5129, -11.5129],
...,
[ -7.6684, -7.1913, -7.5972, ..., -11.2963, -11.5129, -11.5129],
[ -6.9868, -6.9480, -8.0950, ..., -11.2144, -11.5129, -11.5129],
[ -7.3785, -7.5166, -8.3014, ..., -11.3842, -11.5045, -11.5129]]],
device='cuda:0')}, '中文男': {'embedding': tensor([[-4.7675e-01, 4.8706e-01, 6.0701e-01, -7.8119e-01, 3.1131e-01,
2.1380e-01, 1.0490e+00, -1.2204e-02, 3.0464e-01, 1.1475e+00,
-7.8873e-01, 8.6599e-01, -3.5816e-01, 6.6639e-01, 4.2000e-01,
-2.6694e+00, -7.1964e-01, 8.9845e-01, 1.1887e-01, 1.5026e+00,
5.0782e-01, 3.5375e-01, -3.9658e-01, -4.4128e-02, 6.8545e-02,
-1.7699e+00, -3.3017e-01, 4.8699e-01, -4.0177e-01, -1.4162e+00,
-1.0552e+00, 5.4526e-01, 6.4377e-02, -4.3332e-01, 2.8372e-01,
1.9373e-01, 1.8469e+00, -4.8248e-01, 2.5491e-01, -1.5284e+00,
3.7985e-01, -1.2131e+00, 5.7382e-01, 1.9263e+00, 1.0277e+00,
-2.0410e+00, 1.6994e+00, 4.0125e-01, -1.3802e+00, -1.3170e+00,
5.7778e-01, 3.7044e-01, 2.3611e+00, 2.2229e-01, 1.8479e-02,
8.7804e-01, 4.4884e-01, 9.3549e-01, -6.5089e-01, -1.0038e+00,
-2.1671e+00, -2.5344e-01, 1.0269e+00, -1.2754e-01, -1.4712e+00,
6.0334e-02, 7.2617e-01, 9.6811e-01, -5.1777e-01, -2.8487e-01,
1.4608e+00, -5.7581e-01, -1.5666e+00, 7.0522e-01, -8.2405e-01,
8.2156e-01, 7.4584e-01, -1.8134e+00, 1.2690e+00, 8.8461e-01,
-8.7108e-01, 5.2647e-01, 6.3282e-01, -6.5093e-01, -6.7059e-01,
-8.8662e-01, 1.5164e+00, 9.0019e-02, -2.0860e+00, 6.9681e-01,
5.3205e-01, 1.1582e+00, 5.1838e-02, 2.7013e-01, -1.3089e+00,
-7.4506e-01, -2.1965e+00, -8.5331e-01, 7.0330e-01, 5.1489e-01,
1.2879e-01, 1.0945e+00, -6.0475e-01, 1.7567e+00, -1.0414e+00,
6.5718e-01, 3.1544e-01, 1.4416e-01, 9.4417e-01, -1.9898e-02,
-2.4229e-01, 1.8041e+00, -1.5624e+00, -4.0960e-01, -4.6308e-01,
-1.8926e-01, -2.0597e+00, -9.2740e-01, -1.6079e+00, 6.4653e-01,
9.7411e-01, -9.7731e-01, 1.0752e+00, -1.0201e+00, -8.9851e-02,
-1.4135e+00, -8.5500e-01, 1.0708e+00, 6.5052e-01, -1.9524e+00,
-4.1272e-01, 5.7558e-01, -1.3777e+00, 7.3943e-01, -2.7827e-02,
1.0093e-01, 4.3256e-01, -7.2485e-01, 3.9214e-01, -2.5242e+00,
-3.4206e-01, 5.2858e-01, -1.1947e+00, 1.1940e+00, -5.7256e-01,
9.1469e-01, -3.4823e-01, 1.1689e+00, 5.6559e-01, -7.8982e-01,
8.2389e-01, 1.9192e-01, 8.5633e-01, 7.0533e-01, -3.4850e-01,
1.2857e+00, 4.5024e-01, -6.3573e-01, -7.4346e-01, -1.1228e+00,
6.4149e-01, -1.0785e-02, -8.8332e-01, -3.0175e+00, 2.0277e+00,
3.9993e-01, 9.0774e-01, -5.5440e-01, -7.0448e-01, -1.1110e+00,
-6.0681e-02, 4.4644e-01, 3.2798e-01, 2.6591e-01, 5.6404e-01,
-3.9028e-01, 2.8194e-02, -1.3617e+00, 5.2578e-01, -1.5641e+00,
-1.1308e+00, 1.0324e+00, 2.0887e+00, -9.8852e-01, -2.5042e-03,
1.8714e+00, -9.9099e-01, -1.3469e+00, -1.0467e+00, -1.3493e+00,
9.3264e-01, 5.6087e-01]], device='cuda:0'), 'speech_token': tensor([[ 285, 369, 2130, 93, 93, 40, 40, 40, 659, 702, 678, 345,
230, 70, 723, 398, 642, 377, 67, 653, 217, 3480, 114, 24,
32, 307, 722, 87, 187, 618, 688, 3950, 624, 294, 413, 517,
161, 307, 235, 235, 1852, 12, 290, 308, 259, 89, 323, 56,
94, 289, 2437, 223, 126, 3575, 58, 512, 352, 107, 22, 232,
232, 596, 361, 609, 289, 92, 51, 51, 51, 195, 195, 195,
394, 394, 426, 394, 85, 673, 673, 580, 75, 75, 75, 75,
3238, 3238, 3238, 3238, 3238, 3238, 3238, 3238, 3238, 3238, 3238, 75,
75, 75, 53, 53, 222, 222, 588, 334, 40, 40, 459, 470,
38, 1923, 585, 557, 175, 618, 183, 391, 101, 289, 314, 390,
310, 376, 376, 722, 342, 189, 226, 1354, 183, 506, 371, 316,
706, 433, 352, 1593, 338, 516, 469, 310, 32, 32, 1354, 1354,
422, 79, 79, 378, 74, 277, 3254, 274, 274, 655, 1381, 64,
92, 378, 175, 1942, 153, 183, 183, 183, 515, 225, 210, 95,
445, 56, 399, 479, 297, 2644, 82, 2849, 134, 2644, 426, 601,
15, 562, 215, 215, 215, 489, 401, 713, 15, 2644, 568, 58,
553, 553, 58, 58, 58, 77, 145, 889, 2185, 51, 51, 163,
163, 372, 372, 163, 643, 349, 579, 579, 390, 390, 164, 517,
139, 517, 236, 1059, 128, 331, 331, 331, 331, 331, 331, 331,
331, 331, 331, 331, 331, 66, 331, 66, 66, 66, 66, 331,
331, 331, 331, 331, 66, 685, 685, 710, 293, 172, 160, 157,
468, 160, 1700, 27, 27, 348, 27, 33, 681, 33, 33, 33,
33, 33, 27, 27, 27, 293, 350, 2992, 315, 315, 386, 386,
386, 160, 1531, 56, 3158, 175, 492, 596, 211, 363, 183, 363,
363, 234, 135, 306, 3950, 307, 742, 283, 348, 27, 27, 353,
353, 353, 353, 334, 710, 710, 459, 230, 230, 230, 442, 140,
442, 230, 3238, 3238, 75, 3238, 1357, 75, 3238, 222, 3238, 279,
222, 531, 1191, 360, 331, 331, 470, 44, 252, 132, 629, 247,
33, 33, 27, 269, 396, 422, 323, 277, 1381, 51, 2185, 223,
15, 99, 22, 2297, 310, 310, 3132, 366, 274, 274, 274, 58,
58, 58, 352, 3276, 3276, 180, 415, 390, 414, 250, 570, 15,
15, 3290, 216, 57, 374, 185, 180, 1037, 542, 1037, 1059, 40,
40, 40, 331, 663, 615, 331, 331, 66, 331, 331, 66, 66,
66, 66, 66, 66, 66, 331, 331, 331, 331, 485, 53, 701,
124, 1089, 584, 363, 183, 456, 594, 103, 362, 362, 354, 354,
354, 576, 236, 1037, 180, 542, 542, 701, 8, 8, 1089, 1091,
1091, 132, 213, 1460, 458, 3823, 3823, 401, 351, 342, 584, 183,
515, 492, 173, 133, 74, 74, 1381, 1381, 377, 1065, 124, 149,
274, 389, 1381, 64, 2185, 745, 90, 1531, 502, 4, 597, 205,
7, 713, 364, 364, 352, 352, 601, 673, 215, 632, 232, 232,
1401, 542, 459, 128, 128, 459, 459, 459, 1089, 1089, 425, 412,
694, 12, 374, 245, 354, 34, 306, 1037, 123, 1935, 452, 742,
289, 354, 1405, 354, 445, 370, 353, 353, 630, 90, 184, 297,
2644, 2849, 2849, 426, 619, 680, 8, 34, 673, 531, 75, 3238,
3238, 3238, 3238, 3238, 3238, 610, 369, 369, 331, 331, 717]],
device='cuda:0'), 'speech_feat': tensor([[[ -9.3215, -9.5973, -10.0039, ..., -11.5129, -11.5129, -11.5129],
[ -7.5648, -8.1816, -8.8086, ..., -11.5129, -11.5129, -11.5129],
[ -7.1480, -7.5778, -7.9967, ..., -11.5129, -11.5129, -11.5129],
...,
[-11.5129, -11.5129, -11.5129, ..., -11.5129, -11.5129, -11.5129],
[-11.5129, -11.5129, -11.5129, ..., -11.5129, -11.5129, -11.5129],
[-11.5129, -11.5129, -11.5129, ..., -11.5129, -11.5129, -11.5129]]],
device='cuda:0')}, '日语男': {'embedding': tensor([[-1.3031e+00, 1.8902e+00, -2.6645e-01, -8.0333e-01, -8.7831e-01,
2.7975e-01, -4.8120e-01, 4.1698e-01, 6.8325e-01, -3.1044e-01,
-1.0099e+00, -4.8102e-01, 1.3026e+00, 1.4372e+00, 7.7542e-01,
-1.2588e+00, 1.7457e+00, 2.1351e-01, 6.6470e-01, 6.5626e-01,
2.6224e-02, 6.5727e-01, -4.4175e-01, -1.8700e-01, 5.1056e-01,
-2.6516e-01, 1.1470e+00, 9.0935e-01, -7.3813e-01, -6.7816e-01,
-2.6307e+00, -7.8841e-01, -6.7323e-01, -1.1491e+00, 1.7465e-01,
-1.5082e+00, 5.6501e-01, 5.4691e-01, -3.1582e-01, 7.7718e-01,
1.0027e+00, -3.8597e-01, -6.0435e-01, 1.8058e+00, 1.0477e+00,
2.5615e-01, 1.1061e+00, 1.6427e-01, 2.2443e-01, 1.4641e+00,
-7.6686e-01, -8.6277e-01, 2.4495e+00, 1.0992e+00, 1.1165e-01,
4.4591e-01, 1.6204e+00, -1.0559e+00, 8.9757e-01, -5.5822e-01,
-6.7474e-01, 1.7221e-01, 1.8765e-01, -2.1729e-03, -1.4465e+00,
1.6277e+00, 4.8228e-01, -5.0746e-01, 5.4760e-01, 9.3079e-01,
1.1266e+00, 3.2098e-01, -5.7850e-01, 9.2404e-01, -2.4793e+00,
-3.7849e-01, -6.6986e-01, -3.1956e-01, 1.2064e+00, 1.4272e+00,
2.8604e-01, -6.8995e-01, 7.7872e-01, -4.8621e-01, -2.7185e-01,
-6.1518e-01, -5.3003e-01, 3.5535e-01, -1.0850e+00, -6.1244e-01,
7.4615e-02, 1.0174e+00, -1.0699e+00, -8.5835e-01, -1.5384e-01,
5.9325e-01, -7.2253e-01, -1.7782e+00, 1.1371e+00, 1.5473e+00,
5.8995e-01, 1.1755e+00, -2.7910e-01, 5.1421e-01, 4.5708e-01,
-7.7983e-01, -6.0261e-02, 4.3313e-01, -5.0399e-01, 1.1208e-01,
-4.6231e-01, 1.4253e-01, 2.3490e-01, -7.5292e-01, -5.1627e-02,
1.3354e+00, -1.1029e+00, 2.8536e-02, -1.4660e+00, 7.7420e-01,
-1.0889e-01, -7.0215e-01, 2.2416e+00, -1.1519e+00, -1.9500e+00,
-1.7068e-01, -8.2140e-01, 3.8515e-01, 1.1886e+00, 2.6890e-01,
-1.4466e+00, 3.8584e-01, -6.3433e-02, 1.6589e-01, -1.8657e+00,
9.5727e-02, -1.2112e+00, -1.5481e-01, 4.8097e-01, 2.5307e-01,
1.2443e+00, -3.8979e-01, -1.5058e+00, -2.8740e-01, 2.3880e+00,
-1.4192e+00, -9.3661e-02, -2.2720e+00, -7.7954e-01, -4.7817e-01,
-2.2115e+00, 1.1656e-01, -8.3665e-01, -1.4938e-01, -3.1006e-01,
1.5582e-01, -6.4252e-01, 5.4089e-01, -5.6263e-01, -7.7892e-01,
4.7434e-01, 7.6446e-01, 7.8744e-01, -1.0673e+00, -8.6421e-01,
-1.1173e+00, 1.2507e+00, -1.2973e-01, 1.0593e+00, -5.3912e-01,
-3.6194e-01, 4.6449e-02, 4.2183e-02, 9.6895e-01, 4.4996e-01,
-3.1568e-02, 8.0555e-02, 1.0295e+00, 7.6236e-01, -1.3918e+00,
-1.1825e+00, -5.3569e-01, -3.0238e-01, 8.7621e-01, 2.7224e-01,
-9.0096e-01, 1.7000e-02, -1.5319e+00, 2.0826e-01, -7.1982e-01,
4.6724e-01, -4.0060e-01]], device='cuda:0'), 'speech_token': tensor([[ 721, 236, 93, 113, 113, 66, 66, 717, 113, 113, 66, 66,
66, 66, 66, 66, 66, 659, 659, 222, 339, 331, 322, 2280,
292, 562, 2644, 3539, 3539, 202, 69, 270, 298, 298, 626, 187,
903, 234, 175, 1354, 323, 323, 323, 154, 378, 378, 95, 84,
55, 593, 348, 468, 3605, 690, 112, 284, 4022, 59, 277, 1479,
1065, 2592, 263, 263, 432, 608, 2386, 226, 110, 110, 289, 590,
649, 51, 1479, 1065, 1089, 334, 334, 1089, 1089, 67, 2057, 3055,
539, 612, 419, 497, 2592, 263, 556, 1930, 1354, 468, 1307, 690,
728, 3945, 1596, 1596, 3945, 201, 298, 493, 253, 55, 1942, 2723,
363, 234, 175, 175, 56, 56, 167, 378, 277, 277, 1479, 2592,
274, 274, 51, 51, 889, 656, 824, 65, 471, 3539, 202, 261,
4085, 364, 3276, 432, 254, 95, 695, 695, 715, 269, 269, 2723,
56, 245, 59, 579, 1596, 406, 406, 406, 420, 1104, 123, 193,
322, 193, 292, 540, 676, 275, 188, 507, 20, 1104, 41, 376,
664, 523, 437, 437, 22, 695, 122, 296, 57, 713, 713, 601,
742, 591, 1593, 275, 188, 484, 484, 2782, 275, 177, 260, 439,
596, 124, 282, 558, 665, 250, 250, 346, 497, 384, 92, 51,
274, 608, 340, 342, 702, 903, 363, 234, 175, 173, 173, 483,
1930, 30, 133, 250, 6, 2592, 5, 448, 225, 214, 3605, 557,
286, 167, 135, 302, 608, 608, 464, 117, 1191, 26, 331, 331,
113, 113, 66, 66, 66, 66, 93, 66, 331, 66, 331, 66,
113, 113, 717]], device='cuda:0'), 'speech_feat': tensor([[[ -4.6863, -6.6239, -7.0651, ..., -10.7300, -10.8600, -10.8074],
[ -4.6294, -6.7029, -7.3860, ..., -10.6501, -10.9809, -11.2505],
[ -4.5679, -6.5523, -7.2109, ..., -10.6648, -11.0300, -11.3344],
...,
[ -5.7137, -6.7451, -7.2342, ..., -11.5129, -11.5129, -11.5129],
[ -6.2658, -6.7849, -7.4820, ..., -11.3821, -11.5045, -11.5129],
[ -5.7583, -6.0757, -7.2771, ..., -11.3052, -11.1713, -11.5129]]],
device='cuda:0')}, '粤语女': {'embedding': tensor([[-0.1969, -0.1435, -0.0468, 0.2112, -0.1789, -1.8323, -0.9693, 0.4591,
0.8763, 0.5482, 0.5223, -0.1915, -1.0553, -0.2993, 0.4395, -2.4448,
0.9848, 0.4474, 0.1033, -0.5681, -1.9561, 0.2100, 0.2141, -0.5670,
-1.0424, -0.3662, 1.2050, 1.3377, -0.9223, 1.5488, 0.1558, 0.1659,
-0.6821, 0.2471, 0.5928, -2.5487, -1.5112, -0.9493, -0.3737, 0.1702,
1.8489, -0.3798, -0.2190, -0.6872, -1.1834, -0.6712, 1.0532, -0.0190,
2.2731, -0.3706, -0.1694, -2.3571, -0.2222, 0.8383, -0.5220, -0.1608,
1.0763, 0.4311, 0.3533, -0.9165, -1.1992, -0.0883, 1.2301, 0.1274,
-0.1058, -1.1235, -0.2592, -0.7906, 0.0557, 0.9510, 0.1903, 1.4673,
-0.0257, -0.0436, -1.8494, -0.0210, -0.2197, -1.2701, -0.0939, 0.8600,
1.4706, 0.7247, -1.3012, 1.4704, 1.3117, -0.1701, 1.5219, -0.6960,
-2.1877, -1.3771, 1.3182, 0.9956, 0.0834, 0.4926, 0.1783, -0.1478,
-0.5444, 0.1503, 1.6087, 1.1256, 0.6545, -0.1216, 0.2156, 1.8215,
-0.2343, -0.6883, 0.3137, 1.2115, 0.3568, -0.7127, -0.8973, -1.9232,
-0.4944, 0.8579, -0.2224, 0.2643, -1.1702, 0.4519, 0.8602, 1.8224,
-0.7389, -1.4205, 1.5533, -0.6394, -0.6742, -0.6862, -0.4629, 0.3285,
0.5873, -1.3729, 0.9474, -0.0762, -0.0346, -0.1638, 0.1082, 0.2504,
1.0158, -1.2255, -1.5530, -0.6329, 0.6331, -0.5957, -0.8390, -1.1768,
-0.2724, -1.5437, -0.5866, 0.3517, 1.6734, -1.1675, -0.7558, 0.3115,
0.5257, -0.2852, 1.3183, -0.8656, 0.3889, -1.9070, -0.0390, 1.3818,
0.6150, -0.5260, -0.0301, -2.0680, -1.2278, 0.1366, 0.2305, -0.5451,
-1.5646, 0.4323, -0.2363, -0.1353, 0.8171, -0.4531, 0.4188, 0.3738,
-0.3860, 0.1857, -0.8900, 0.5536, -0.4883, -0.3649, 2.0771, 0.5843,
1.2069, 2.2535, -1.4702, -1.3866, -1.8340, -0.1048, 1.7880, 0.5300]],
device='cuda:0'), 'speech_token': tensor([[ 117, 632, 561, 561, 561, 491, 730, 730, 730, 730, 66, 66,
66, 730, 658, 658, 658, 675, 13, 376, 376, 376, 722, 1401,
44, 3406, 3966, 600, 114, 597, 141, 318, 104, 247, 281, 44,
87, 476, 1923, 3350, 388, 388, 199, 523, 523, 523, 523, 56,
183, 183, 515, 506, 3052, 323, 56, 332, 1037, 1037, 84, 342,
584, 292, 653, 352, 1006, 271, 523, 437, 298, 298, 3406, 581,
97, 569, 59, 1405, 437, 437, 84, 715, 446, 533, 10, 3836,
706, 2437, 569, 2644, 3276, 3276, 3276, 3276, 3276, 564, 892, 24,
209, 609, 1423, 489, 106, 1357, 23, 23, 73, 23, 2188, 504,
352, 892, 22, 432, 122, 252, 420, 12, 694, 1935, 67, 4030,
15, 341, 314, 563, 536, 503, 470, 87, 476, 283, 551, 2989,
721, 298, 460, 381, 402, 524, 3950, 154, 2645, 3823, 2828, 341,
1, 2031, 39, 2723, 468, 269, 353, 6, 685, 685, 66, 66,
66, 66, 66, 66, 331, 331, 331, 1091, 132, 742, 574, 286,
475, 557, 328, 629, 466, 98, 600, 2188, 504, 141, 408, 217,
3539, 601, 69, 503, 2592, 98, 382, 600, 2362, 513, 3605, 551,
247, 281, 100, 2592, 584, 238, 612, 223, 312, 579, 523, 523,
523, 633, 3836, 3836, 224, 150, 2644, 15, 338, 741, 84, 437,
376, 253, 420, 298, 420, 180, 85, 144, 685, 66, 508, 508,
658, 685, 373, 230, 230, 408, 408, 197, 475, 557, 242, 242,
242, 368, 451, 690, 286, 745, 223, 2437, 441, 361, 2592, 211,
292, 290, 448, 3132, 2514, 56, 378, 277, 686, 384, 528, 126,
553, 512, 672, 471, 550, 889, 889, 590, 59, 462, 556, 493,
493, 298, 420, 201, 701, 663, 369, 331, 66, 66, 66, 66,
66, 66, 66, 66, 66, 66, 66, 66, 66, 717]],
device='cuda:0'), 'speech_feat': tensor([[[-11.5129, -11.5129, -11.5129, ..., -11.5129, -11.5129, -11.5129],
[-11.5129, -11.5129, -11.5129, ..., -11.5129, -11.5129, -11.5129],
[-11.5129, -11.5129, -11.5129, ..., -11.5129, -11.5129, -11.5129],
...,
[-11.5129, -11.5129, -11.5129, ..., -11.5129, -11.5129, -11.5129],
[-11.5129, -11.5129, -11.5129, ..., -11.5129, -11.5129, -11.5129],
[-11.5129, -11.5129, -11.5129, ..., -11.5129, -11.5129, -11.5129]]],
device='cuda:0')}, '英文女': {'embedding': tensor([[-1.4556, -1.3013, -0.3299, -0.4263, 0.6739, -0.5690, -0.7223, -0.3618,
-0.6770, 0.8135, 0.4935, 0.4375, 0.5491, -0.2415, 0.7385, 0.2287,
1.0232, -0.1263, -1.7717, -0.1396, -0.1710, -0.6737, 1.8385, 0.2327,
-1.0090, 1.2143, -0.4867, -0.0675, -0.6182, 1.0131, -0.3552, -0.9996,
0.8581, 0.7129, -1.5999, -1.2446, 0.4725, -1.6447, 0.5419, 0.5807,
-0.7352, -0.4161, -0.5054, 1.9792, -2.4218, 1.4045, 1.8349, -0.4315,
-1.3391, 1.5637, 1.1627, -0.8678, 0.5302, 0.0382, 1.6232, -0.4335,
0.5422, 0.1175, -0.6774, 0.2484, -0.8455, 0.6560, -0.5422, 1.6432,
-2.9410, -0.0230, 0.4893, -0.0417, -0.1740, 0.5300, 0.2378, 1.4690,
-0.6861, 1.4702, -2.0491, 1.3436, -0.3719, 0.7002, -1.2961, 0.3317,
-0.4353, -0.1195, 0.1552, 0.2309, -0.6221, -1.1655, -0.0923, -0.5683,
-0.4412, 0.6325, 0.4848, 1.2498, -0.1350, 0.3888, -0.5797, -1.0550,
-0.6502, 0.2681, 1.0384, -0.4460, 0.1237, -0.8851, 0.2679, 0.1724,
-0.3117, -1.0035, -0.5544, -0.8458, 0.3724, 1.1379, -1.2626, 2.2689,
-0.7179, -0.3757, -0.5575, 0.1028, 1.3534, -0.5462, -0.0298, 0.7662,
0.9525, 1.0099, 0.9511, -1.5990, -0.9701, 1.5835, -0.2195, -0.7667,
-1.1735, -1.3060, 0.3632, -0.3092, -0.7741, 0.4087, -1.2942, 1.6960,
0.0501, -1.3519, 0.2984, -0.0199, 0.2493, 0.4421, -0.7375, -0.3596,
0.5879, 0.6165, -0.7374, -0.4900, 0.0428, -0.0174, -0.2094, -1.5255,
-1.0162, 0.9244, -0.6027, -1.9240, -0.4226, -0.2336, 0.0605, -0.1652,
1.2383, 0.2921, -2.1344, -1.7786, 1.3701, 0.5320, -0.8170, 1.3863,
0.2675, -0.9943, -0.1598, 2.0528, 1.3806, 1.2134, -1.0226, 1.7294,
0.0431, -0.2445, 0.7839, 1.0050, -0.4569, -0.3267, 0.5031, 0.7904,
2.1394, 0.7950, -0.5983, 1.1351, 0.7460, -0.5474, -0.6163, 1.1376]],
device='cuda:0'), 'speech_token': tensor([[ 158, 1651, 658, 195, 668, 2437, 4022, 471, 258, 737, 737, 737,
737, 554, 554, 548, 548, 199, 415, 384, 289, 456, 189, 1089,
584, 12, 12, 345, 141, 335, 16, 620, 3539, 3945, 65, 298,
298, 165, 514, 551, 551, 499, 641, 641, 36, 396, 42, 412,
742, 190, 177, 15, 739, 739, 739, 338, 374, 556, 1938, 86,
86, 399, 489, 117, 631, 1091, 425, 123, 193, 600, 3966, 439,
469, 468, 468, 1531, 596, 391, 407, 167, 86, 1938, 1228, 396,
189, 823, 456, 167, 326, 181, 186, 525, 409, 409, 80, 198,
114, 682, 639, 392, 382, 418, 292, 205, 1531, 157, 242, 388,
388, 551, 1307, 3406, 484, 484, 408, 921, 442, 491, 508, 221]],
device='cuda:0'), 'speech_feat': tensor([[[ -6.6001, -6.3639, -6.7548, ..., -9.6119, -9.8227, -10.1639],
[ -6.5760, -6.2447, -6.1768, ..., -9.5864, -9.7566, -10.0724],
[ -6.4362, -5.9555, -6.5979, ..., -9.4161, -9.9756, -10.1819],
...,
[ -6.8824, -6.3123, -6.0811, ..., -9.6549, -9.8115, -10.3965],
[ -6.3582, -6.8269, -6.8885, ..., -9.6664, -9.7302, -10.2607],
[ -6.1423, -6.7846, -6.4034, ..., -9.4011, -9.6978, -10.0100]]],
device='cuda:0')}, '英文男': {'embedding': tensor([[-6.4719e-01, -7.1094e-01, -2.2881e-01, 1.3874e+00, 8.2008e-01,
4.8295e-01, -5.9931e-01, -6.1875e-01, 9.9403e-01, -5.8049e-01,
1.1247e-01, -7.9185e-01, 2.6493e-01, -8.3107e-01, -5.5452e-01,
-8.5718e-01, 2.1137e-01, 8.7637e-01, 7.5762e-01, -8.1615e-01,
1.3638e-01, -2.1305e+00, 7.7112e-01, -3.9093e-01, -1.3988e+00,
5.6536e-01, -1.5327e+00, 3.8509e-01, -5.5981e-01, 6.8568e-01,
4.2675e-01, -4.7077e-01, 3.4126e-01, 5.0763e-01, 6.1105e-01,
-8.5112e-01, -5.5386e-01, 9.2686e-01, -4.0160e-01, 2.6848e-01,
-4.0465e-01, 9.8235e-01, -1.5106e+00, 1.2429e+00, 9.9345e-01,
-1.4952e+00, 4.5787e-01, -3.9413e-01, 1.1567e-01, 1.0856e+00,
2.0915e-02, -8.8601e-01, 3.4121e-01, -1.8011e-01, 7.0075e-01,
9.1641e-01, 2.3630e+00, -6.0352e-01, -1.7904e+00, 1.8500e-01,
1.0219e-02, 1.5213e+00, -1.3812e+00, -4.2260e-01, -1.2729e+00,
3.3842e-01, 1.0142e+00, -4.6411e-01, -1.2828e-01, -4.0485e-01,
-9.0776e-01, -4.7940e-01, -1.1606e+00, -2.1256e-01, -5.0234e-01,
4.2317e-01, -2.6833e-01, -2.5672e+00, 7.4133e-01, 1.4632e+00,
4.2950e-01, 2.8933e-01, -1.1250e+00, -1.5428e+00, -1.9553e-01,
-9.1382e-01, 2.4734e-01, -4.0098e-01, 2.4457e-01, -9.0906e-01,
-1.0248e+00, 1.2235e+00, -6.4356e-02, 1.5575e-01, 1.5667e+00,
-1.0697e+00, -1.4210e+00, 7.4060e-01, 1.5086e+00, -4.4081e-01,
1.7145e-01, 1.8446e-01, 8.6810e-01, 6.4559e-01, -6.0665e-01,
-6.6244e-01, -2.2629e-01, 3.9117e+00, 2.1379e+00, 1.8273e+00,
-1.3816e+00, 1.2950e-01, -1.9059e+00, -2.6882e+00, -3.7000e-01,
5.1291e-01, -4.9078e-02, 1.0519e+00, -4.6783e-01, 3.2123e-01,
-6.7206e-01, 4.2309e-01, -9.4660e-03, 5.7700e-01, 1.8402e-01,
4.9386e-01, -2.9039e-01, -9.6920e-02, 2.6475e-01, -9.1313e-01,
-3.2732e-01, 5.5973e-01, -8.2432e-02, 1.3699e+00, 6.8342e-01,
1.0146e+00, 1.6304e+00, -2.0010e+00, 1.2307e-03, -1.6830e-01,
-1.4833e+00, -1.8005e-02, -1.6665e-01, -1.8714e-01, -8.6757e-01,
1.4004e+00, -2.0489e-01, 2.7431e-02, -2.3862e-01, 7.9429e-01,
-1.4454e+00, -1.1812e+00, -2.6246e-02, -1.0379e+00, 1.4085e+00,
-1.2108e+00, 2.7138e-01, -1.6849e+00, -1.1831e+00, -7.6286e-01,
-2.3408e-01, 3.8747e-01, -1.4353e+00, -1.2320e+00, -1.1137e+00,
1.8655e+00, -1.0418e+00, -2.0211e+00, -7.3341e-02, -3.9539e-01,
8.0678e-02, -5.0632e-01, 1.8988e-01, -1.9242e+00, -1.3309e+00,
2.9146e-02, -5.2176e-01, -1.8427e+00, -7.9164e-01, -6.0876e-01,
-1.1789e-01, -1.0538e+00, 1.0737e+00, 1.2448e-01, -2.3428e-01,
-3.1194e-01, -1.5872e+00, -1.1111e+00, 1.0814e+00, -9.2491e-01,
6.2495e-01, -1.2366e+00]], device='cuda:0'), 'speech_token': tensor([[3539, 334, 678, 240, 88, 160, 1519, 179, 676, 2299, 3, 42,
1091, 309, 67, 335, 214, 484, 1519, 629, 629, 1923, 419, 892,
258, 732, 48, 174, 174, 174, 415, 1381, 289, 608, 429, 194,
194, 194, 522, 46, 46, 3889, 719, 570, 260, 569, 737, 737,
48, 1930, 331, 165, 165, 165, 731, 411, 1073, 467, 54, 54,
1596, 1596, 535, 181, 186, 606, 537, 209, 729, 315, 455, 447,
447, 337, 271, 372, 649, 649, 590, 623, 119, 623, 528, 441,
479, 340, 750, 189, 630, 157, 699, 95, 370, 1354, 483, 483,
457, 240, 1988, 6, 220, 2188, 1529, 387, 2437, 1460, 15, 297,
3290, 177, 2503, 585, 557, 159, 729, 478, 532, 576, 477, 354,
409, 110, 79, 56, 557, 557, 690, 498, 533, 532, 414, 738,
257, 1214, 376, 535, 437, 192, 1479, 415, 51, 2185, 223, 297,
676, 1397, 100, 50, 173, 540, 674, 321, 123, 419, 603, 603,
99, 721, 65, 653, 4013, 4013, 217, 217, 606, 411, 411, 411,
416, 1206, 409, 409, 326, 628, 346, 269, 194, 2362, 441, 441,
441, 2362, 2992, 3254, 194, 292, 1531, 2514, 445, 3, 42, 1091,
425, 706, 511, 511, 198, 740, 612, 612, 257, 597, 455, 4,
3254, 903, 386, 621, 731, 411, 411, 1206, 351, 408, 408, 408,
647, 647, 647, 647, 507, 2844, 3290, 446, 3108, 376, 682, 3094,
98, 565, 371, 709, 80, 409, 1206, 422, 172, 269, 172, 1531,
479, 479, 260, 297, 355, 355, 141, 673, 299, 23, 3006, 23,
23, 299, 299, 299, 131, 921, 2130, 2130, 2130, 717]],
device='cuda:0'), 'speech_feat': tensor([[[ -8.1229, -8.2229, -8.6592, ..., -11.5129, -11.5129, -11.5129],
[ -8.3514, -8.2158, -9.0726, ..., -11.1713, -11.5129, -11.5129],
[ -8.9071, -8.4114, -8.4058, ..., -11.3321, -11.5129, -11.5129],
...,
[ -8.9930, -8.6342, -8.2628, ..., -10.9315, -11.2630, -11.4201],
[ -8.8316, -8.1066, -8.1131, ..., -11.0446, -11.5129, -11.5129],
[ -9.1211, -8.5437, -8.7552, ..., -11.3651, -11.5129, -11.5129]]],
device='cuda:0')}, '韩语女': {'embedding': tensor([[ 0.3559, 0.0075, 0.7824, -1.3229, -0.4104, -0.5673, 0.7463, 0.2918,
0.0501, 0.6650, 0.0554, -0.6958, 0.6234, 1.0058, 0.1716, 0.7261,
0.4176, -1.5641, 0.2550, 0.8561, -1.0421, -1.1070, 0.9912, -0.3048,
0.8053, 1.2477, 0.2118, 0.4926, -0.2253, 0.4106, -1.1353, 0.2981,
-0.5120, -1.3426, -0.4672, -0.9174, -0.8926, 0.3541, -0.3001, 2.0379,
-0.5697, 0.4150, -0.6529, -0.5269, 0.1288, 0.3733, -0.7477, 0.4031,
0.1324, 0.7907, 0.2212, -0.5951, 0.3474, -0.3024, -0.0577, -0.6939,
2.2824, -1.2562, 0.4734, 0.2147, 0.1676, 0.3042, -1.3242, 0.6650,
-2.0095, 1.6330, -0.8184, -0.3007, 0.7152, 0.8885, 0.3289, 1.9745,
-0.4467, 0.0587, -1.5234, 0.3085, -0.0238, -0.7412, 0.7878, 1.7830,
-0.4795, 0.7525, 0.7809, 0.2657, 0.6756, 1.1625, 0.4655, -1.1271,
-1.8302, -0.9431, -0.1690, 0.0662, -0.6112, -0.6476, -0.0378, -0.6226,
-0.9118, -2.3297, -0.0817, 0.5703, 0.1067, -0.3166, 0.3982, 1.1134,
-0.3720, 0.5518, 0.6592, 0.5218, -1.2867, 1.3264, -0.8759, 0.5806,
-0.0355, -0.1546, 0.8875, -0.5831, 0.0310, 0.7176, 0.0167, -1.2911,
0.6448, -1.1088, 1.6829, -2.2430, -1.2822, 0.6450, -0.2556, -0.2339,
0.4436, -0.5160, -0.0071, -0.9674, 0.8554, 1.1653, 1.2980, 1.5738,
-0.1621, -0.4692, 1.0122, -0.5182, 2.1998, -1.3604, -1.3553, -0.7227,
-0.1674, 0.0611, -1.0978, -0.6428, 0.3869, -0.9169, -1.5022, -1.8399,
-1.0636, -0.6494, -1.3622, -1.4425, -0.1470, 0.2533, 0.2820, -0.6465,
-0.5298, -1.1177, -0.6846, -1.6116, -0.1288, 1.2980, 0.0981, 1.4555,
-0.2777, 0.5467, 1.0632, -0.8862, 0.1383, -0.9257, -0.0537, 1.2740,
0.6180, -0.1066, 0.2609, 0.5594, -0.7424, -0.5614, -1.0667, 0.9445,
-0.0257, 0.3918, -1.2326, 0.4544, -0.5121, -0.5960, -1.6542, 0.9597]],
device='cuda:0'), 'speech_token': tensor([[ 745, 454, 1651, 1651, 1651, 1651, 730, 730, 730, 730, 730, 730,
730, 730, 730, 730, 730, 730, 730, 730, 658, 658, 658, 658,
658, 192, 3254, 263, 274, 58, 126, 59, 59, 364, 22, 307,
270, 253, 403, 714, 243, 2437, 112, 50, 50, 50, 483, 403,
719, 292, 653, 217, 202, 674, 96, 309, 317, 540, 540, 173,
173, 1354, 3158, 124, 124, 189, 220, 203, 468, 157, 157, 823,
596, 2057, 244, 157, 348, 1519, 283, 318, 366, 1089, 1261, 600,
357, 340, 228, 177, 644, 644, 92, 548, 1065, 149, 149, 415,
21, 413, 605, 605, 605, 63, 48, 48, 1381, 311, 609, 489,
85, 435, 1849, 1849, 19, 120, 468, 468, 351, 130, 1091, 1089,
450, 106, 584, 2188, 597, 141, 603, 352, 721, 1206, 419, 350,
124, 124, 558, 558, 2437, 4022, 644, 190, 527, 594, 594, 683,
259, 1930, 499, 88, 232, 38, 608, 277, 563, 130, 1091, 1091,
1091, 1091, 358, 706, 473, 427, 79, 453, 284, 2299, 118, 1382,
118, 590, 267, 267, 271, 517, 530, 443, 530, 436, 463, 440,
440, 440, 440, 440, 440, 440, 2444, 436, 436, 221, 710, 1091,
1988, 274, 2691, 337, 389, 390, 376, 41, 253, 145, 54, 54,
65, 422, 551, 283, 283, 88, 69, 69, 270, 337, 714, 597,
141, 408, 634, 3605, 3605, 3254, 733, 665, 151, 1930, 569, 423,
54, 3108, 467, 387, 892, 540, 173, 173, 1930, 346, 497, 665,
433, 4013, 512, 1460, 644, 742, 591, 731, 30, 391, 750, 124,
1261, 12, 504, 141, 519, 716, 750, 1091, 1089, 1091, 358, 317,
374, 260, 15, 341, 57, 2297, 55, 2297, 593, 1700, 348, 348,
348, 157, 157, 86, 137, 1942, 3821, 3821, 3821, 2057, 340, 488,
512, 1460, 2185, 130, 3821, 3821, 717, 602, 1091, 425, 67, 603,
601, 158, 130, 42, 98, 546, 488, 217, 3539, 660, 44, 460,
406, 406, 87, 87, 348, 33, 1531, 2057, 309, 292, 57, 115,
15, 134, 134, 134, 426, 394, 364, 4013, 433, 117, 685, 685,
730, 730, 730, 730, 730, 730, 730, 730, 730, 730, 730, 730,
730, 730, 730, 66, 66, 66, 331, 331, 331, 717]],
device='cuda:0'), 'speech_feat': tensor([[[-11.5129, -11.5129, -11.5129, ..., -11.5129, -11.5129, -11.5129],
[-11.5129, -11.5129, -11.5129, ..., -11.5129, -11.5129, -11.5129],
[-11.5129, -11.5129, -11.5129, ..., -11.5129, -11.5129, -11.5129],
...,
[-11.5129, -11.5129, -11.5129, ..., -11.5129, -11.5129, -11.5129],
[-11.5129, -11.5129, -11.5129, ..., -11.5129, -11.5129, -11.5129],
[-11.5129, -11.5129, -11.5129, ..., -11.5129, -11.5129, -11.5129]]],
device='cuda:0')}}