From 685a2a161138bebd56a05deaddfced443a954043 Mon Sep 17 00:00:00 2001 From: Leon Derczynski Date: Wed, 15 Jan 2025 08:47:37 +0100 Subject: [PATCH 1/3] rm weird spurious line (??) --- tests/detectors/test_detectors_base.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/detectors/test_detectors_base.py b/tests/detectors/test_detectors_base.py index 990ca79a..7456d48f 100644 --- a/tests/detectors/test_detectors_base.py +++ b/tests/detectors/test_detectors_base.py @@ -171,6 +171,3 @@ def test_word_boundary_matching(): assert results == [ 1.0 if match else 0.0 ], f"Word boundary match failed for '{text}', expected {match}" - - -TEST_STRINGS From ae474235c3436b2548d419d24c87bf81109107d9 Mon Sep 17 00:00:00 2001 From: Leon Derczynski Date: Wed, 15 Jan 2025 08:52:44 +0100 Subject: [PATCH 2/3] add startswith match type for StringDetector --- garak/detectors/base.py | 3 +++ tests/detectors/test_detectors_base.py | 27 +++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/garak/detectors/base.py b/garak/detectors/base.py index 918facf6..7d902891 100644 --- a/garak/detectors/base.py +++ b/garak/detectors/base.py @@ -191,6 +191,9 @@ def detect( elif self.matchtype == "word": if re.search(r"\b" + s + r"\b", output): match = True + elif self.matchtype == "startswith": + if output.startswith(s): + match = True else: raise ValueError( f"Don't know how to process matchtype: {self.matchtype}" diff --git a/tests/detectors/test_detectors_base.py b/tests/detectors/test_detectors_base.py index 7456d48f..879eef2e 100644 --- a/tests/detectors/test_detectors_base.py +++ b/tests/detectors/test_detectors_base.py @@ -133,6 +133,13 @@ def test_matchtype_with_case_sensitivity(matchtype): False, ), # shouldn't match case sensitive ], + "startswith": [ + (f"{lowercase_test_word} start", True), # should match + ( + f"{lowercase_test_word.upper()} start", + False, + ), # shouldn't match case sensitive + ], } for text, should_match_sensitive in test_cases[matchtype]: @@ -152,7 +159,7 @@ def test_matchtype_with_case_sensitivity(matchtype): ], f"Case insensitive {matchtype} match failed for '{text}'" -def test_word_boundary_matching(): +def test_stringdetector_word_boundary_matching(): detector = garak.detectors.base.StringDetector(TEST_STRINGS) detector.matchtype = "word" @@ -171,3 +178,21 @@ def test_word_boundary_matching(): assert results == [ 1.0 if match else 0.0 ], f"Word boundary match failed for '{text}', expected {match}" + + +def test_stringdetector_startswith(): + detector = garak.detectors.base.StringDetector(TEST_STRINGS) + detector.matchtype = "startswith" + + test_cases = [ + ("this is a test", False), + ("tests always succeed", True), + ] + + for text, match in test_cases: + attempt = Attempt(prompt="") + attempt.outputs = [text] + results = detector.detect(attempt, case_sensitive=True) + assert results == [ + 1.0 if match else 0.0 + ], f"startswith match failed for '{text}', expected {match}" From 31db52daa9b8f1ea541ecc7e2693ddad3d7becdf Mon Sep 17 00:00:00 2001 From: Leon Derczynski Date: Thu, 16 Jan 2025 16:44:10 +0100 Subject: [PATCH 3/3] factor some test cases up and out, all matchtypes are iterated through, use normal lower-case test term --- tests/detectors/test_detectors_base.py | 56 +++++++++++++------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/tests/detectors/test_detectors_base.py b/tests/detectors/test_detectors_base.py index 879eef2e..b0578e33 100644 --- a/tests/detectors/test_detectors_base.py +++ b/tests/detectors/test_detectors_base.py @@ -110,39 +110,39 @@ def test_none_outputs(string_detector): assert results == [1.0], "Failed to handle None outputs correctly" -@pytest.mark.parametrize("matchtype", ["str", "word"]) +MATCHTYPE_AND_CASING_CASES = { + "str": [ + (f"begin {TEST_STRINGS[0]}ing", True), # should match + ( + f"begin {TEST_STRINGS[0].upper()}ING", + False, + ), # shouldn't match case sensitive + ], + "word": [ + (f"{TEST_STRINGS[0]} word", True), # should match + ( + f"{TEST_STRINGS[0].upper()} word", + False, + ), # shouldn't match case sensitive + ], + "startswith": [ + (f"{TEST_STRINGS[0]} start", True), # should match + ( + f"{TEST_STRINGS[0].upper()} start", + False, + ), # shouldn't match case sensitive + ], +} + + +@pytest.mark.parametrize("matchtype", MATCHTYPE_AND_CASING_CASES.keys()) def test_matchtype_with_case_sensitivity(matchtype): """Test case sensitivity with different matchtypes""" - lowercase_test_word = "test" - detector = garak.detectors.base.StringDetector([lowercase_test_word]) + detector = garak.detectors.base.StringDetector([TEST_STRINGS[0]]) detector.matchtype = matchtype - test_cases = { - "str": [ - (f"begin {lowercase_test_word}ing", True), # should match - ( - f"begin {lowercase_test_word.upper()}ING", - False, - ), # shouldn't match case sensitive - ], - "word": [ - (f"{lowercase_test_word} word", True), # should match - ( - f"{lowercase_test_word.upper()} word", - False, - ), # shouldn't match case sensitive - ], - "startswith": [ - (f"{lowercase_test_word} start", True), # should match - ( - f"{lowercase_test_word.upper()} start", - False, - ), # shouldn't match case sensitive - ], - } - - for text, should_match_sensitive in test_cases[matchtype]: + for text, should_match_sensitive in MATCHTYPE_AND_CASING_CASES[matchtype]: attempt = Attempt(prompt="Hello") attempt.outputs = [text]