forked from PaddlePaddle/PaddleHub
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* 微博谣言预测 * 请教条 * Update module.py * Update module.py * Update module.py * Update README.md * Update README.md
- Loading branch information
1 parent
045e4e2
commit 1226702
Showing
11 changed files
with
1,422 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
## 概述 | ||
|
||
|
||
Rumor_prediction是预测语句是否为谣言的模型。 | ||
|
||
## 命令行预测 | ||
|
||
```shell | ||
$ hub run Rumor_prediction --input_text='兴仁县今天抢小孩没抢走,把孩子母亲捅了一刀,看见这车的注意了,真事,车牌号辽HFM055!!!!!赶紧散播! 都别带孩子出去瞎转悠了 尤其别让老人自己带孩子出去 太危险了 注意了!!!!辽HFM055北京现代朗动,在各学校门口抢小孩!!!110已经 证实!!全市通缉!!' | ||
``` | ||
|
||
## API | ||
|
||
```python | ||
def Rumor(texts, use_gpu=False): | ||
``` | ||
|
||
预测API,预测语句是否为谣言。 | ||
|
||
**参数** | ||
|
||
* texts (list\[str\]): 想要预测是否为谣言的语句; | ||
* use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA\_VISIBLE\_DEVICES环境变量**; | ||
|
||
**返回** | ||
|
||
* results (list[dict]): 预测结果的列表,列表中每一个元素为 dict,各字段为: | ||
|
||
- content(str):输入文本内容 | ||
- prediction(str):预测结果 | ||
- probability(float):预测结果概率 | ||
|
||
**代码示例** | ||
|
||
```python | ||
import paddlehub as hub | ||
|
||
module = hub.Module(name="Rumor_prediction") | ||
|
||
test_texts = ['兴仁县今天抢小孩没抢走,把孩子母亲捅了一刀,看见这车的注意了,真事,车牌号辽HFM055!!!!!赶紧散播! 都别带孩子出去瞎转悠了 尤其别让老人自己带孩子出去 太危险了 注意了!!!!辽HFM055北京现代朗动,在各学校门口抢小孩!!!110已经 证实!!全市通缉!!'] | ||
results = module.Rumor(texts=test_texts, use_gpu=True) | ||
print(results) | ||
``` | ||
|
||
|
||
### 依赖 | ||
|
||
paddlepaddle >= 2.0.0rc1 | ||
|
||
paddlehub >= 2.0.0rc0 |
Large diffs are not rendered by default.
Oops, something went wrong.
152 changes: 152 additions & 0 deletions
152
modules/text/text_generation/Rumor_prediction/module.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,152 @@ | ||
# coding:utf-8 | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License" | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
import argparse | ||
import ast | ||
import os | ||
import math | ||
import six | ||
import time | ||
from pathlib import Path | ||
|
||
from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor | ||
from paddlehub.module.module import runnable, serving, moduleinfo | ||
from paddlehub.io.parser import txt_parser | ||
from paddlehub.compat.module.nlp_module import DataFormatError | ||
import numpy as np | ||
import paddle | ||
import paddlehub as hub | ||
|
||
@moduleinfo( | ||
name="Rumor_prediction", | ||
version="1.0.0", | ||
type="nlp/semantic_model", | ||
summary= | ||
"Is the input text prediction a rumor", | ||
author="彭兆帅,郑博培", | ||
author_email="[email protected],[email protected]") | ||
class Rumorprediction(hub.Module): | ||
def _initialize(self): | ||
""" | ||
Initialize with the necessary elements | ||
""" | ||
# 加载模型路径 | ||
self.default_pretrained_model_path = os.path.join(self.directory, "infer_model") | ||
|
||
def Rumor(self, texts, use_gpu=False): | ||
""" | ||
Get the input and program of the infer model | ||
Args: | ||
image (list(numpy.ndarray)): images data, shape of each is [H, W, C], the color space is BGR. | ||
use_gpu(bool): Weather to use gpu | ||
""" | ||
# 获取数据 | ||
def get_data(sentence): | ||
# 读取数据字典 | ||
with open(self.directory + '/dict.txt', 'r', encoding='utf-8') as f_data: | ||
dict_txt = eval(f_data.readlines()[0]) | ||
dict_txt = dict(dict_txt) | ||
# 把字符串数据转换成列表数据 | ||
keys = dict_txt.keys() | ||
data = [] | ||
for s in sentence: | ||
# 判断是否存在未知字符 | ||
if not s in keys: | ||
s = '<unk>' | ||
data.append(int(dict_txt[s])) | ||
return data | ||
data = [] | ||
for text in texts: | ||
text = get_data(text) | ||
data.append(text) | ||
base_shape = [[len(c) for c in data]] | ||
paddle.enable_static() | ||
place = paddle.CUDAPlace(0) if use_gpu else paddle.CPUPlace() | ||
exe = paddle.static.Executor(place) | ||
exe.run(paddle.static.default_startup_program()) | ||
[infer_program, feeded_var_names, target_var] = paddle.fluid.io.load_inference_model(dirname=self.default_pretrained_model_path, executor=exe) | ||
# 生成预测数据 | ||
tensor_words = paddle.fluid.create_lod_tensor(data, base_shape, place) | ||
# 执行预测 | ||
result = exe.run(program=infer_program, | ||
feed={feeded_var_names[0]: tensor_words}, | ||
fetch_list=target_var) | ||
# 分类名称 | ||
names = [ '谣言', '非谣言'] | ||
|
||
|
||
results = [] | ||
|
||
# 获取结果概率最大的label | ||
for i in range(len(data)): | ||
content = texts[i] | ||
lab = np.argsort(result)[0][i][-1] | ||
|
||
alltext = { | ||
'content': content, | ||
'prediction': names[lab], | ||
'probability': result[0][i][lab] | ||
} | ||
alltext = [alltext] | ||
results = results + alltext | ||
|
||
return results | ||
|
||
|
||
def add_module_config_arg(self): | ||
""" | ||
Add the command config options | ||
""" | ||
self.arg_config_group.add_argument( | ||
'--use_gpu', | ||
type=ast.literal_eval, | ||
default=False, | ||
help="whether use GPU for prediction") | ||
|
||
def add_module_input_arg(self): | ||
""" | ||
Add the command input options | ||
""" | ||
self.arg_input_group.add_argument( | ||
'--input_text', | ||
type=str, | ||
default=None, | ||
help="input_text is str") | ||
@runnable | ||
def run_cmd(self, argvs): | ||
""" | ||
Run as a command | ||
""" | ||
self.parser = argparse.ArgumentParser( | ||
description='Run the %s module.' % self.name, | ||
prog='hub run %s' % self.name, | ||
usage='%(prog)s', | ||
add_help=True) | ||
|
||
self.arg_input_group = self.parser.add_argument_group( | ||
title="Input options", description="Input data. Required") | ||
self.arg_config_group = self.parser.add_argument_group( | ||
title="Config options", | ||
description= | ||
"Run configuration for controlling module behavior, optional.") | ||
|
||
self.add_module_config_arg() | ||
self.add_module_input_arg() | ||
|
||
args = self.parser.parse_args(argvs) | ||
input_text = [args.input_text] | ||
results = self.Rumor( | ||
texts=input_text, use_gpu=args.use_gpu) | ||
|
||
return results |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
## 概述 | ||
|
||
|
||
ernie_gen_leave是基于ERNIE-GEN进行微调的模型,该模型的主要功能为生成请假条。输出一个关键词,给出你的请假理由。 | ||
|
||
## 命令行预测 | ||
|
||
```shell | ||
$ hub run ernie_gen_leave --input_text="理由" --use_gpu True --beam_width 5 | ||
``` | ||
|
||
## API | ||
|
||
```python | ||
def generate(texts, use_gpu=False, beam_width=5): | ||
``` | ||
|
||
预测API,输入关键字给出请假理由。 | ||
|
||
**参数** | ||
|
||
* texts (list\[str\]): 请假关键字; | ||
* use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA\_VISIBLE\_DEVICES环境变量**; | ||
* beam\_width: beam search宽度,决定输出多少理由的数量。 | ||
|
||
**返回** | ||
|
||
* results (list\[list\]\[str\]): 输出请假理由。 | ||
|
||
**代码示例** | ||
|
||
```python | ||
import paddlehub as hub | ||
|
||
module = hub.Module(name="ernie_gen_leave") | ||
|
||
test_texts = ["理由"] | ||
results = module.generate(texts=test_texts, use_gpu=False, beam_width=2) | ||
for result in results: | ||
print(result) | ||
``` | ||
|
||
|
||
## 查看代码 | ||
|
||
https://github.com/PaddlePaddle/PaddleHub/tree/release/v2.0.0-rc/modules/text/text_generation/ernie_gen_leave | ||
|
||
### 依赖 | ||
|
||
paddlepaddle >= 2.0.0rc1 | ||
|
||
paddlehub >= 2.0.0rc0 |
Oops, something went wrong.