From 104a652fd498e6bd995512089209d186d56bc3e2 Mon Sep 17 00:00:00 2001 From: James Braza Date: Thu, 7 Mar 2024 14:58:54 -0800 Subject: [PATCH] Added Doc.__hash__, with test (#244) --- paperqa/types.py | 3 +++ tests/test_paperqa.py | 16 +++++++++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/paperqa/types.py b/paperqa/types.py index d1ab001a9..ed1384c8f 100644 --- a/paperqa/types.py +++ b/paperqa/types.py @@ -54,6 +54,9 @@ class Doc(Embeddable): citation: str dockey: DocKey + def __hash__(self) -> int: + return hash((self.docname, self.dockey)) + class Text(Embeddable): text: str diff --git a/tests/test_paperqa.py b/tests/test_paperqa.py index dc6733e77..189fc7347 100644 --- a/tests/test_paperqa.py +++ b/tests/test_paperqa.py @@ -641,21 +641,31 @@ async def my_callback(result): assert len(my_results) > 1 -def test_duplicate(): +def test_duplicate() -> None: + """Check Docs doesn't store duplicates, while checking nonduplicate docs are stored.""" docs = Docs() assert docs.add_url( "https://en.wikipedia.org/wiki/Frederick_Bates_(politician)", citation="WikiMedia Foundation, 2023, Accessed now", - dockey="test", + dockey="test1", ) assert ( docs.add_url( "https://en.wikipedia.org/wiki/Frederick_Bates_(politician)", citation="WikiMedia Foundation, 2023, Accessed now", - dockey="test", + dockey="test1", ) is None ) + assert len(docs.docs) == 1, "Should have added only one document" + assert docs.add_url( + "https://en.wikipedia.org/wiki/National_Flag_of_Canada_Day", + citation="WikiMedia Foundation, 2023, Accessed now", + dockey="test2", + ) + assert ( + len(set(docs.docs.values())) == 2 + ), "Unique documents should be hashed as unique" def test_custom_embedding():