modules/text/syntactic_analysis/DDParser/module.py

# -*- coding:utf-8 -*-
import os
import argparse

import paddlehub as hub
from paddlehub.module.module import serving, moduleinfo, runnable
from paddlenlp import Taskflow


@moduleinfo(
    name="ddparser",
    version="1.1.0",
    summary="Baidu's open-source DDParser model.",
    author="baidu-nlp",
    author_email="",
    type="nlp/syntactic_analysis")
class ddparser(hub.NLPPredictionModule):
    def __init__(self,
                 tree=True,
                 prob=False, 
                 use_pos=False,
                 batch_size=1,
                 return_visual=False,
                 ):
        self.ddp = Taskflow(
            "dependency_parsing",
            tree=tree, 
            prob=prob, 
            use_pos=use_pos,
            batch_size=batch_size,
            return_visual=return_visual)

    @serving
    def serving_parse(self, texts):
        results = self.parse(texts)
        for i in range(len(results)):
            org_list = results[i]["head"]
            results[i]["head"] = [str(x) for x in org_list]
        return results

    def parse(self, texts):
        """
        parse the dependency.

        Args:
            texts(str or list[str]): the input texts to be parse.

        Returns:
            results(list[dict]): a list, with elements corresponding to each of the elements in texts. The element is a dictionary of shape:
                {
                    'word': list[str], the tokenized words.
                    'head': list[int], the head ids.
                    'deprel': list[str], the dependency relation.
                    'prob': list[float], the prediction probility of the dependency relation.
                    'postag': list[str], the POS tag. If the element of the texts is list, the key 'postag' will not return.
                    'visual' : numpy.ndarray: the dependency visualization. Use cv2.imshow to show or cv2.imwrite to save it. If return_visual=False, it will not return.
                }
       """
        return self.ddp(texts)

    @runnable
    def run_cmd(self, argvs):
        """
        Run as a command
        """
        self.parser = argparse.ArgumentParser(
            description='Run the %s module.' % self.name,
            prog='hub run %s' % self.name,
            usage='%(prog)s',
            add_help=True)

        self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required")

        self.add_module_input_arg()

        args = self.parser.parse_args(argvs)

        input_data = self.check_input_data(args)

        results = self.parse(texts=input_data)

        return results

    def visualize(self, text):
        """
        Visualize the dependency.

        Args:
            text(str): input text.

        Returns:
            data(numpy.ndarray): a numpy array, use cv2.imshow to show it or cv2.imwrite to save it.
        """

        if isinstance(text, str):
            result = self.ddp(text)[0]['visual']
            return result
        else:
            raise TypeError(
                "Invalid inputs, input text should be str, but type of {} found!".format(type(text))
            )