Skip to content

Commit

Permalink
Adding inverted index
Browse files Browse the repository at this point in the history
  • Loading branch information
Yomguithereal committed Apr 14, 2017
1 parent 6fe0e33 commit a56c05f
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 15 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ Full documentation for the library can be found [here](https://yomguithereal.git
* [Fibonacci Heap](https://yomguithereal.github.io/mnemonist/fibonacci-heap)
* [Heap](https://yomguithereal.github.io/mnemonist/heap)
* [Index](https://yomguithereal.github.io/mnemonist/index-structure)
* [Inverted Index](https://yomguithereal.github.io/mnemonist/inverted-index)
* [Linked List](https://yomguithereal.github.io/mnemonist/linked-list)
* [MultiIndex](https://yomguithereal.github.io/mnemonist/multi-index)
* [MultiMap](https://yomguithereal.github.io/mnemonist/multi-map)
Expand Down
2 changes: 2 additions & 0 deletions endpoint.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ module.exports = {
MinHeap: Heap.MinHeap,
MaxHeap: Heap.MaxHeap,
Index: require('./index.js'),
InvertedIndex: require('./inverted-index.js'),
LinkedList: require('./linked-list.js'),
MultiIndex: require('./multi-index.js'),
MultiMap: require('./multi-map.js'),
Expand All @@ -27,6 +28,7 @@ module.exports = {
Stack: require('./stack.js'),
SuffixArray: SuffixArray,
GeneralizedSuffixArray: SuffixArray.GeneralizedSuffixArray,
Set: require('./set.js'),
SymSpell: require('./symspell.js'),
Trie: require('./trie.js'),
VPTree: require('./vp-tree.js')
Expand Down
72 changes: 62 additions & 10 deletions inverted-index.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
var iterateOver = require('./utils/iterate.js'),
helpers = require('./set.js');

var intersect = helpers.intersect;

function identity(x) {
return x;
}
Expand Down Expand Up @@ -100,24 +98,27 @@ InvertedIndex.prototype.add = function(doc) {
};

/**
* Method used to query the index.
* Method used to query the index in a AND fashion.
*
* @param {any} query - Query
* @return {Set} - Intersection of documents matching the query.
*/
InvertedIndex.prototype.get = function(query) {
InvertedIndex.prototype.query = function(query) {

// Early termination
if (!this.size)
return new Set();
return [];

// First we need to tokenize the query
var tokens = this.queryTokenizer(query);

if (!Array.isArray(tokens))
throw new Error('mnemonist/InvertedIndex.get: tokenizer function should return an array of tokens.');
throw new Error('mnemonist/InvertedIndex.query: tokenizer function should return an array of tokens.');

if (!tokens.length)
return [];

var matchingSet,
var matchingSet = new Set(),
token,
set,
i,
Expand All @@ -131,10 +132,61 @@ InvertedIndex.prototype.get = function(query) {
if (!set || !set.size)
return new Set();

if (!matchingSet)
matchingSet = new Set(set);
if (!matchingSet.size)
helpers.add(matchingSet, set);
else
intersect(matchingSet, set);
helpers.intersect(matchingSet, set);
}

var results = new Array(matchingSet.size),
iterator = matchingSet.values(),
step;

i = 0;

while ((step = iterator.next(), !step.done))
results[i++] = this.items[step.value];

return results;
};
InvertedIndex.prototype.andQuery = InvertedIndex.prototype.query;

/**
* Method used to query the index in an OR fashion.
*
* @param {any} query - Query
* @return {Set} - Union of documents matching the query.
*/
InvertedIndex.prototype.orQuery = function(query) {

// Early termination
if (!this.size)
return [];

// First we need to tokenize the query
var tokens = this.queryTokenizer(query);

if (!Array.isArray(tokens))
throw new Error('mnemonist/InvertedIndex.orQuery: tokenizer function should return an array of tokens.');

if (!tokens.length)
return [];

var matchingSet = new Set(),
token,
set,
i,
l;

for (i = 0, l = tokens.length; i < l; i++) {
token = tokens[i];
set = this.mapping.get(token);

// Empty set
if (!set || !set.size)
continue;

helpers.add(matchingSet, set);
}

var results = new Array(matchingSet.size),
Expand Down
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
"fibonacci heap",
"stack",
"queue",
"inverted index",
"linked list",
"trie",
"bag",
Expand Down
22 changes: 17 additions & 5 deletions test/inverted-index.js
Original file line number Diff line number Diff line change
Expand Up @@ -73,20 +73,32 @@ describe('InvertedIndex', function() {
it('should be possible to query the index.', function() {
var index = InvertedIndex.from(DOCS, tokenizer);

var results = index.get('A mouse.');
var results = index.query('A mouse.');
assert.deepEqual(results, DOCS);

results = index.get('cheese');
results = index.query('cheese');
assert.deepEqual(results, DOCS.slice(1));

results = index.get('The cat');
results = index.query('The cat');
assert.deepEqual(results, [DOCS[0]]);

results = index.get('The cat likes');
results = index.query('The cat likes');
assert.deepEqual(results, []);

results = index.get('really something');
results = index.query('really something');
assert.deepEqual(results, DOCS.slice(-1));

assert.deepEqual(index.query('really'), index.andQuery('really'));
});

it('should be possible to query union.', function() {
var index = InvertedIndex.from(DOCS, tokenizer);

var results = index.orQuery('something');
assert.deepEqual(results, DOCS.slice(-1));

results = index.orQuery('The cat is really');
assert.deepEqual(results, [DOCS[0], DOCS[2]]);
});

it('should be possible to iterate using #.forEach', function() {
Expand Down

0 comments on commit a56c05f

Please sign in to comment.