Skip to content

Commit

Permalink
Merge pull request #244 from JohnSnowLabs/514/better-exception-handli…
Browse files Browse the repository at this point in the history
…ng-for-bad-inputs

better exception handling for QA models when input data not correct f…
  • Loading branch information
C-K-Loan authored Jan 29, 2024
2 parents e45c075 + beb9179 commit 7e16df2
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 5 deletions.
14 changes: 11 additions & 3 deletions nlu/pipe/utils/data_conversion_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,25 @@
from pyspark.sql.types import StringType, StructType, StructField


class NluDataParseException(Exception):
"""Custom exception class"""

def __init__(self, message="An error occurred parsing data with NLU"):
self.message = message
super().__init__(self.message)

class DataConversionUtils:
# Modin aswell but optional, so we dont import the type yet
supported_types = [pyspark.sql.DataFrame, pd.DataFrame, pd.Series, np.ndarray]

@staticmethod
def except_text_col_not_found(cols):
raise ValueError(
raise NluDataParseException(
f'Could not find column named "text" in input Pandas Dataframe. Please ensure one column named such exists. Columns in DF are : {cols} ')

@staticmethod
def except_invalid_question_data_format(cols):
raise ValueError(
raise NluDataParseException(
f'You input data format is invalid for question answering with span classification.'
f'Make sure you have at least 2 columns in you dataset, named context/question for pandas Dataframes'
f'For Strings/Iterables/Tuples make sure to use the format `question|||context` or (question,context) ')
Expand Down Expand Up @@ -301,7 +308,6 @@ def to_spark_df(data, spark_sess, raw_text_column='text', is_span_data=False, is
# TODO invalid Table Data Format Exception
pass
if isinstance(data[0], str):

return DataConversionUtils.table_question_str_to_sdf(data, spark_sess)
if isinstance(data[0], pd.DataFrame):
return DataConversionUtils.table_question_pdf_to_sdf(data, spark_sess)
Expand All @@ -321,6 +327,8 @@ def to_spark_df(data, spark_sess, raw_text_column='text', is_span_data=False, is
return DataConversionUtils.question_tuple_iterable_to_sdf(data, spark_sess)
elif isinstance(data[0], str):
return DataConversionUtils.question_str_iterable_to_sdf(data, spark_sess)
except NluDataParseException as err :
raise err
except:
ValueError("Data could not be converted to Spark Dataframe for internal conversion.")
else:
Expand Down
6 changes: 4 additions & 2 deletions nlu/pipe/utils/predict_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from sparknlp.common import AnnotatorType

from nlu.pipe.utils.audio_data_conversion_utils import AudioDataConversionUtils
from nlu.pipe.utils.data_conversion_utils import DataConversionUtils
from nlu.pipe.utils.data_conversion_utils import DataConversionUtils, NluDataParseException
from nlu.pipe.utils.ocr_data_conversion_utils import OcrDataConversionUtils

logger = logging.getLogger('nlu')
Expand Down Expand Up @@ -364,12 +364,14 @@ def __predict__(pipe, data, output_level, positions, keep_stranger_features, met
try:
return __predict_standard_spark(pipe, data, output_level, positions, keep_stranger_features, metadata,
drop_irrelevant_cols, return_spark_df, get_embeddings)
except NluDataParseException as err:
logger.warning(f"Predictions Failed={err}")
raise err
except Exception as err:
logger.warning(f"Predictions Failed={err}")
pipe.print_exception_err(err)
raise Exception("Failure to process data with NLU")


def debug_print_pipe_cols(pipe):
for c in pipe.components:
print(f'{c.spark_input_column_names}->{c.name}->{c.spark_output_column_names}')

0 comments on commit 7e16df2

Please sign in to comment.