Skip to content

Commit

Permalink
funasr1.0 update
Browse files Browse the repository at this point in the history
  • Loading branch information
LauraGPT committed Jan 22, 2024
1 parent 37d7764 commit 9425add
Show file tree
Hide file tree
Showing 11 changed files with 16 additions and 11 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ from funasr import AutoModel
# use vad, punc, spk or not as you need
model = AutoModel(model="paraformer-zh", model_revision="v2.0.2",
vad_model="fsmn-vad", vad_model_revision="v2.0.2",
punc_model="ct-punc-c", punc_model_revision="v2.0.2",
punc_model="ct-punc-c", punc_model_revision="v2.0.3",
# spk_model="cam++", spk_model_revision="v2.0.2",
)
res = model.generate(input=f"{model.model_path}/example/asr_example.wav",
Expand Down
2 changes: 1 addition & 1 deletion README_zh.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ from funasr import AutoModel
# use vad, punc, spk or not as you need
model = AutoModel(model="paraformer-zh", model_revision="v2.0.2",
vad_model="fsmn-vad", vad_model_revision="v2.0.2",
punc_model="ct-punc-c", punc_model_revision="v2.0.2",
punc_model="ct-punc-c", punc_model_revision="v2.0.3",
# spk_model="cam++", spk_model_revision="v2.0.2",
)
res = model.generate(input=f"{model.model_path}/example/asr_example.wav",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
vad_model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch",
vad_model_revision="v2.0.2",
punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch",
punc_model_revision="v2.0.2",
punc_model_revision="v2.0.3",
spk_model="damo/speech_campplus_sv_zh-cn_16k-common",
spk_model_revision="v2.0.2",
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ model_revision="v2.0.2"
vad_model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch"
vad_model_revision="v2.0.2"
punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch"
punc_model_revision="v2.0.2"
punc_model_revision="v2.0.3"
spk_model="damo/speech_campplus_sv_zh-cn_16k-common"
spk_model_revision="v2.0.2"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
vad_model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch",
vad_model_revision="v2.0.2",
punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch",
punc_model_revision="v2.0.2",
punc_model_revision="v2.0.3",
spk_model="damo/speech_campplus_sv_zh-cn_16k-common",
spk_model_revision="v2.0.2"
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ model_revision="v2.0.2"
vad_model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch"
vad_model_revision="v2.0.2"
punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch"
punc_model_revision="v2.0.2"
punc_model_revision="v2.0.3"
spk_model="damo/speech_campplus_sv_zh-cn_16k-common"
spk_model_revision="v2.0.2"

Expand Down
7 changes: 6 additions & 1 deletion examples/industrial_data_pretraining/paraformer/demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,12 @@

from funasr import AutoModel

model = AutoModel(model="damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch", model_revision="v2.0.2")
model = AutoModel(model="damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch", model_revision="v2.0.3",
# vad_model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch",
# vad_model_revision="v2.0.2",
# punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch",
# punc_model_revision="v2.0.3",
)

res = model.generate(input="https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav")
print(res)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
vad_model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch",
vad_model_revision="v2.0.2",
punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch",
punc_model_revision="v2.0.2",
punc_model_revision="v2.0.3",
spk_model="damo/speech_campplus_sv_zh-cn_16k-common",
spk_model_revision="v2.0.2",
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ model_revision="v2.0.2"
vad_model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch"
vad_model_revision="v2.0.2"
punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch"
punc_model_revision="v2.0.2"
punc_model_revision="v2.0.3"

python funasr/bin/inference.py \
+model=${model} \
Expand Down
2 changes: 1 addition & 1 deletion funasr/auto/auto_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,7 @@ def inference_with_vad(self, input, input_len=None, **cfg):
if self.punc_model is not None:
self.punc_kwargs.update(cfg)
punc_res = self.inference(result["text"], model=self.punc_model, kwargs=self.punc_kwargs, **cfg)
result["text_with_punc"] = punc_res[0]["text"]
result["text"] = punc_res[0]["text"]

# speaker embedding cluster after resorted
if self.spk_model is not None:
Expand Down
2 changes: 1 addition & 1 deletion funasr/models/paraformer/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,7 @@ def inference(self,
self.nbest = kwargs.get("nbest", 1)

meta_data = {}
if isinstance(data_in, torch.Tensor): # fbank
if isinstance(data_in, torch.Tensor) and kwargs.get("data_type", "sound") == "fbank": # fbank
speech, speech_lengths = data_in, data_lengths
if len(speech.shape) < 3:
speech = speech[None, :, :]
Expand Down

0 comments on commit 9425add

Please sign in to comment.