diff --git a/garak/detectors/base.py b/garak/detectors/base.py index 918facf6..7d902891 100644 --- a/garak/detectors/base.py +++ b/garak/detectors/base.py @@ -191,6 +191,9 @@ def detect( elif self.matchtype == "word": if re.search(r"\b" + s + r"\b", output): match = True + elif self.matchtype == "startswith": + if output.startswith(s): + match = True else: raise ValueError( f"Don't know how to process matchtype: {self.matchtype}" diff --git a/tests/detectors/test_detectors_base.py b/tests/detectors/test_detectors_base.py index 990ca79a..b0578e33 100644 --- a/tests/detectors/test_detectors_base.py +++ b/tests/detectors/test_detectors_base.py @@ -110,32 +110,39 @@ def test_none_outputs(string_detector): assert results == [1.0], "Failed to handle None outputs correctly" -@pytest.mark.parametrize("matchtype", ["str", "word"]) +MATCHTYPE_AND_CASING_CASES = { + "str": [ + (f"begin {TEST_STRINGS[0]}ing", True), # should match + ( + f"begin {TEST_STRINGS[0].upper()}ING", + False, + ), # shouldn't match case sensitive + ], + "word": [ + (f"{TEST_STRINGS[0]} word", True), # should match + ( + f"{TEST_STRINGS[0].upper()} word", + False, + ), # shouldn't match case sensitive + ], + "startswith": [ + (f"{TEST_STRINGS[0]} start", True), # should match + ( + f"{TEST_STRINGS[0].upper()} start", + False, + ), # shouldn't match case sensitive + ], +} + + +@pytest.mark.parametrize("matchtype", MATCHTYPE_AND_CASING_CASES.keys()) def test_matchtype_with_case_sensitivity(matchtype): """Test case sensitivity with different matchtypes""" - lowercase_test_word = "test" - detector = garak.detectors.base.StringDetector([lowercase_test_word]) + detector = garak.detectors.base.StringDetector([TEST_STRINGS[0]]) detector.matchtype = matchtype - test_cases = { - "str": [ - (f"begin {lowercase_test_word}ing", True), # should match - ( - f"begin {lowercase_test_word.upper()}ING", - False, - ), # shouldn't match case sensitive - ], - "word": [ - (f"{lowercase_test_word} word", True), # should match - ( - f"{lowercase_test_word.upper()} word", - False, - ), # shouldn't match case sensitive - ], - } - - for text, should_match_sensitive in test_cases[matchtype]: + for text, should_match_sensitive in MATCHTYPE_AND_CASING_CASES[matchtype]: attempt = Attempt(prompt="Hello") attempt.outputs = [text] @@ -152,7 +159,7 @@ def test_matchtype_with_case_sensitivity(matchtype): ], f"Case insensitive {matchtype} match failed for '{text}'" -def test_word_boundary_matching(): +def test_stringdetector_word_boundary_matching(): detector = garak.detectors.base.StringDetector(TEST_STRINGS) detector.matchtype = "word" @@ -173,4 +180,19 @@ def test_word_boundary_matching(): ], f"Word boundary match failed for '{text}', expected {match}" -TEST_STRINGS +def test_stringdetector_startswith(): + detector = garak.detectors.base.StringDetector(TEST_STRINGS) + detector.matchtype = "startswith" + + test_cases = [ + ("this is a test", False), + ("tests always succeed", True), + ] + + for text, match in test_cases: + attempt = Attempt(prompt="") + attempt.outputs = [text] + results = detector.detect(attempt, case_sensitive=True) + assert results == [ + 1.0 if match else 0.0 + ], f"startswith match failed for '{text}', expected {match}"