Skip to content

Commit

Permalink
More BM25 similarity tests
Browse files Browse the repository at this point in the history
  • Loading branch information
softwaredoug committed Jul 20, 2024
1 parent feb43f3 commit 4dbb876
Showing 1 changed file with 16 additions and 0 deletions.
16 changes: 16 additions & 0 deletions test/test_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,22 @@ def arr(x):
"num_docs": 8514,
"expected": 3.8199246
},
"rambo_tmdb": {
"term_freqs": 2, # freq, occurrences of term within document
"doc_freqs": 7, # n, number of documents containing term
"doc_lens": 44, # "dl, length of field (approximate)
"avg_doc_len": 50.580456, # avgdl, average length of field
"num_docs": 8514, # N, total number of documents with field
"expected": 4.5636616
},
"the_tmdb": {
"term_freqs": 25, # freq, occurrences of term within document
"doc_freqs": 7823, # n, number of documents containing term
"doc_lens": 152, # "dl, length of field (approximate)
"avg_doc_len": 119.18542, # avgdl, average length of field
"num_docs": 8516, # N, total number of documents with field
"expected": 0.08028283
}
}


Expand Down

0 comments on commit 4dbb876

Please sign in to comment.