From 2f806711f8791c45d9ec7a004e78802545f7aa6f Mon Sep 17 00:00:00 2001 From: ebell495 Date: Wed, 6 Jul 2022 15:48:43 -0400 Subject: [PATCH] Update Fuzz Harness --- fuzz/Dockerfile | 2 +- fuzz/fuzz.py | 35 ++++++++++++++--------------------- 2 files changed, 15 insertions(+), 22 deletions(-) diff --git a/fuzz/Dockerfile b/fuzz/Dockerfile index ec2cdde..d8a8ab8 100644 --- a/fuzz/Dockerfile +++ b/fuzz/Dockerfile @@ -34,7 +34,7 @@ RUN apt-get update && apt-get upgrade -y && apt-get install -y \ apt-get clean # pytest needs to be installed through pip to make sure we have a recent version -RUN pip3 install pytest atheris +RUN pip3 install pytest atheris segments dlinfo # tests expect python to be available as executable 'python' not 'python3' RUN ln -s /usr/bin/python3 /usr/bin/python diff --git a/fuzz/fuzz.py b/fuzz/fuzz.py index 63775c5..267dfdc 100644 --- a/fuzz/fuzz.py +++ b/fuzz/fuzz.py @@ -3,13 +3,8 @@ import sys import os import atheris -# old_stdout = sys.stdout # backup current stdout -# sys.stdout = open(os.devnull, "w") -with atheris.instrument_imports(): - from phonemizer.backend import EspeakBackend, FestivalBackend -# sys.stdout = old_stdout -# from phonemizer import phonemize -# from phonemizer.backend import EspeakBackend, FestivalBackend + +from phonemizer.backend import EspeakBackend, FestivalBackend # Initilize the backends separatly # Running phonemizer repeatly increases the memory usage @@ -18,21 +13,19 @@ @atheris.instrument_func def TestOneInput(data): - barray = bytearray(data) - # espeak.phonemize(str(data).split(" ")) - if len(barray) > 0: - # Choose the backend to use based on the first input byte - r = barray[0] - if r % 2 == 0: - # Make sure to remove the first byte otherwise this will only every test this backend with the first byte being even - del barray[0] - espeak.phonemize(str(data).split(' ')) - else: - del barray[0] - festival.phonemize(str(data).split(' ')) + fdp = atheris.FuzzedDataProvider(data) + + if len(data) < 1: + return + + option = fdp.ConsumeBytes(1)[0] + in_string = fdp.ConsumeUnicodeNoSurrogates(len(data)) + + if option % 2 == 0: + espeak.phonemize(in_string.split(" ")) else: - espeak.phonemize(str(data).split(' ')) - festival.phonemize(str(data).split(' ')) + festival.phonemize(in_string.split(" ")) +atheris.instrument_all() atheris.Setup(sys.argv, TestOneInput) atheris.Fuzz() \ No newline at end of file