lotus-data · dhruviyer · Jan 10, 2025 · Jan 10, 2025 · Jan 10, 2025 · Jan 10, 2025
diff --git a/.github/tests/lm_tests.py b/.github/tests/lm_tests.py
@@ -210,6 +210,108 @@ def test_sem_extract(setup_models, model):
         ), f"Number of Championships '{row['Number of Championships']}' not found in '{row['Number of Championships_quote']}'"
 
 
+################################################################################
+# CoT tests
+################################################################################
+@pytest.mark.parametrize("model", get_enabled("gpt-4o-mini", "ollama/llama3.1"))
+def test_filter_operation_cot(setup_models, model):
+    lm = setup_models[model]
+    lotus.settings.configure(lm=lm)
+
+    # Test filter operation on an easy dataframe
+    data = {
+        "Text": [
+            "I had two apples, then I gave away one",
+            "My friend gave me an apple",
+            "I gave away both of my apples",
+            "I gave away my apple, then a friend gave me his apple, then I threw my apple away",
+        ]
+    }
+    df = pd.DataFrame(data)
+    user_instruction = "{Text} I have at least one apple"
+    filtered_df = df.sem_filter(user_instruction, strategy="cot")
+    expected_df = pd.DataFrame({"Text": ["I had two apples, then I gave away one", "My friend gave me an apple"]})
+    assert filtered_df.equals(expected_df)
+
+
+@pytest.mark.parametrize("model", get_enabled("gpt-4o-mini", "ollama/llama3.1"))
+def test_filter_operation_cot_fewshot(setup_models, model):
+    lm = setup_models[model]
+    lotus.settings.configure(lm=lm)
+
+    # Test filter operation on an easy dataframe
+    data = {
+        "Sequence": [
+            "Five, Four, Three",
+            "A, B, C",
+            "Pond, Lake, Ocean",
+        ]
+    }
+    df = pd.DataFrame(data)
+    examples = {
+        "Sequence": ["1, 2, 3", "penny, nickel, dime, quarter", "villiage, town, city"],
+        "Answer": [True, True, True],
+        "Reasoning": [
+            "1, 2, 3 is an increasing sequence of numbers",
+            "penny, nickel, dime, quarter is an increasing sequence of coins",
+            "villiage, town, city is an increasing sequence of settlements",
+        ],
+    }
+    examples_df = pd.DataFrame(examples)
+
+    user_instruction = "{Sequence} is increasing"
+    filtered_df = df.sem_filter(
+        user_instruction,
+        strategy="cot",
+        examples=examples_df,
+        additional_cot_instructions="Assume the most typical or logical case.",
+    )
+    expected_df = pd.DataFrame(
+        {
+            "Sequence": [
+                "A, B, C",
+                "Pond, Lake, Ocean",
+            ]
+        },
+        index=[1, 2],
+    )
+    assert filtered_df.equals(expected_df)
+
+
+@pytest.mark.parametrize("model", get_enabled("gpt-4o-mini", "ollama/llama3.1"))
+def test_filter_operation_cot_fewshot_no_reasoning(setup_models, model):
+    lm = setup_models[model]
+    lotus.settings.configure(lm=lm)
+
+    # Test filter operation on an easy dataframe
+    data = {
+        "Sequence": [
+            "Five, Four, Three",
+            "A, B, C",
+            "Pond, Lake, Ocean",
+        ]
+    }
+    df = pd.DataFrame(data)
+    examples = {
+        "Sequence": ["1, 2, 3", "penny, nickel, dime, quarter", "villiage, town, city"],
+        "Answer": [True, True, True],
+    }
+    examples_df = pd.DataFrame(examples)
+
+    user_instruction = "{Sequence} is increasing"
+    filtered_df = df.sem_filter(user_instruction, strategy="cot", examples=examples_df)
+    expected_df = pd.DataFrame(
+        {
+            "Sequence": [
+                "A, B, C",
+                "Pond, Lake, Ocean",
+            ]
+        },
+        index=[1, 2],
+    )
+    assert filtered_df.equals(expected_df)
+
+
 ################################################################################
 # Cascade tests
 ################################################################################

diff --git a/.github/tests/multimodality_tests.py b/.github/tests/multimodality_tests.py
@@ -138,7 +138,8 @@ def test_topk_operation(setup_models, model):
 
         top_2_actual = set(sorted_df["image"].values)
         assert top_2_expected == top_2_actual
-
+
+
 @pytest.mark.parametrize("model", get_enabled("gpt-4o-mini"))
 def test_topk_with_groupby_operation(setup_models, model):
     image_url = [
@@ -153,8 +154,7 @@ def test_topk_with_groupby_operation(setup_models, model):
 
     df = image_df.join(element_df, how="cross")
     df.sem_topk("the {image} is most likely an {element}", K=1, group_by=["element"])
-    assert(len(set(df["element"])) == 2)
-
+    assert len(set(df["element"])) == 2
 
 
 @pytest.mark.parametrize("model", get_enabled("clip-ViT-B-32"))

diff --git a/examples/op_examples/filter.py b/examples/op_examples/filter.py
@@ -6,6 +6,7 @@
 lm = LM(model="gpt-4o-mini")
 
 lotus.settings.configure(lm=lm)
+
 data = {
     "Course Name": [
         "Probability and Random Processes",
@@ -16,5 +17,5 @@
 }
 df = pd.DataFrame(data)
 user_instruction = "{Course Name} requires a lot of math"
-df = df.sem_filter(user_instruction)
+df = df.sem_filter(user_instruction, strategy="cot")
 print(df)
diff --git a/examples/op_examples/filter_cot.py b/examples/op_examples/filter_cot.py
@@ -0,0 +1,28 @@
+import pandas as pd
+
+import lotus
+from lotus.models import LM
+
+lm = LM(model="gpt-4o-mini")
+
+lotus.settings.configure(lm=lm)
+
+
+
+
+# Test filter operation on an easy dataframe
+data = {
+    "Text": [
+        "I had two apples, then I gave away one",
+        "My friend gave me an apple",
+        "I gave away both of my apples",
+        "I gave away my apple, then a friend gave me his apple, then I threw my apple away",
+    ]
+}
+df = pd.DataFrame(data)
+user_instruction = "{Text} I have at least one apple"
+filtered_df = df.sem_filter(user_instruction, strategy="cot", return_all=True)
+# filtered_df = df.sem_filter(user_instruction, strategy="cot", return_all=True, return_explanations=True) # uncomment to see reasoning chains
+
+print(filtered_df)
+# print(filtered_df)
diff --git a/examples/op_examples/multimodal_ops/filter.py b/examples/op_examples/multimodal_ops/filter.py
@@ -15,9 +15,7 @@
 labels = [os.path.splitext(image)[0] for image in image_file_names]
 image_paths = [os.path.join("images", image) for image in image_file_names]
 
-df = pd.DataFrame({"image": ImageArray(image_paths), 
-                   "label": labels, 
-                   "image_path": image_paths})
+df = pd.DataFrame({"image": ImageArray(image_paths), "label": labels, "image_path": image_paths})
 
 df = df.sem_filter("{image} represents number 1")
 print(df)
diff --git a/lotus/sem_ops/postprocessors.py b/lotus/sem_ops/postprocessors.py
@@ -8,6 +8,26 @@
 )
 
 
+def cot_postprocessor(llm_answers: list[str]):
+    outputs: list[str | None] = []
+    explanations: list[str | None] = []
+    for llm_answer in llm_answers:
+        reasoning_idx = llm_answer.find("Reasoning:\n")
+        if reasoning_idx == -1:
+            reasoning_idx = 0
+        else:
+            reasoning_idx += len("Reasoning:\n")
+
+        answer_idx = llm_answer.find("Answer:")
+        reasoning = llm_answer[reasoning_idx:answer_idx].rstrip("\n").lstrip("\n")
+        answer = llm_answer[answer_idx + len("Answer:") :]
+
+        explanations.append(reasoning)
+        outputs.append(answer)
+
+    return outputs, explanations
+
+
 def map_postprocess_cot(llm_answers: list[str]) -> SemanticMapPostprocessOutput:
     """
     Postprocess the output of the map operator with CoT reasoning.
@@ -80,48 +100,9 @@ def extract_postprocess(llm_answers: list[str]) -> SemanticExtractPostprocessOut
     return SemanticExtractPostprocessOutput(raw_outputs=llm_answers, outputs=extract_data)
 
 
-def filter_postprocess_cot(llm_answers: list[str], default: bool) -> SemanticFilterPostprocessOutput:
-    """
-    Postprocess the output of the filter operator with CoT reasoning.
-
-    Args:
-        llm_answers (list[str]): The list of llm answers.
-        default (bool): The default value to use if we fail to parse the answer.
-
-    Returns:
-        SemanticFilterPostprocessOutput
-    """
-    outputs: list[bool] = []
-    explanations: list[str | None] = []
-
-    for llm_answer in llm_answers:
-        reasoning_idx = llm_answer.find("Reasoning:\n")
-        if reasoning_idx == -1:
-            reasoning_idx = 0
-        else:
-            reasoning_idx += len("Reasoning:\n")
-
-        answer_idx = llm_answer.find("Answer:")
-        reasoning = llm_answer[reasoning_idx:answer_idx].rstrip("\n").lstrip("\n")
-        answer = llm_answer[answer_idx + len("Answer:") :]
-
-        explanations.append(reasoning)
-
-        if "True" in answer:
-            outputs.append(True)
-        elif "False" in answer:
-            outputs.append(False)
-        else:
-            lotus.logger.info(f"\t Failed to parse: defaulting to {default}")
-            outputs.append(default)
-
-    return SemanticFilterPostprocessOutput(raw_outputs=llm_answers, outputs=outputs, explanations=explanations)
-
-
 def filter_postprocess(
     llm_answers: list[str],
     default: bool = True,
-    cot_reasoning: bool = False,
 ) -> SemanticFilterPostprocessOutput:
     """
     Postprocess the output of the filter operator.
@@ -134,18 +115,21 @@ def filter_postprocess(
     Returns:
         SemanticFilterPostprocessOutput
     """
-    if cot_reasoning:
-        return filter_postprocess_cot(llm_answers, default)
+    outputs, explanations = cot_postprocessor(llm_answers)
+
+    def process_outputs(answer):
+        if answer is None:
+            lotus.logger.info(f"\t Failed to parse {answer}: defaulting to {default}")
+            return default
 
-    outputs: list[bool] = []
-    explanations: list[str | None] = [None] * len(llm_answers)
-    for answer in llm_answers:
         if "True" in answer:
-            outputs.append(True)
+            return True
         elif "False" in answer:
-            outputs.append(False)
+            return False
         else:
-            lotus.logger.info(f"\t Failed to parse: defaulting to {default}")
-            outputs.append(default)
+            lotus.logger.info(f"\t Failed to parse {answer}: defaulting to {default}")
+            return default
+
+    outputs = [process_outputs(answer) for answer in outputs]
 
     return SemanticFilterPostprocessOutput(raw_outputs=llm_answers, outputs=outputs, explanations=explanations)