diff --git a/README.md b/README.md index bbdb9c4a..df2e501e 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,7 @@ Full documentation for the library can be found [here](https://yomguithereal.git * [Fibonacci Heap](https://yomguithereal.github.io/mnemonist/fibonacci-heap) * [Heap](https://yomguithereal.github.io/mnemonist/heap) * [Index](https://yomguithereal.github.io/mnemonist/index-structure) +* [Inverted Index](https://yomguithereal.github.io/mnemonist/inverted-index) * [Linked List](https://yomguithereal.github.io/mnemonist/linked-list) * [MultiIndex](https://yomguithereal.github.io/mnemonist/multi-index) * [MultiMap](https://yomguithereal.github.io/mnemonist/multi-map) diff --git a/endpoint.js b/endpoint.js index 80760c7f..0129bf34 100644 --- a/endpoint.js +++ b/endpoint.js @@ -19,6 +19,7 @@ module.exports = { MinHeap: Heap.MinHeap, MaxHeap: Heap.MaxHeap, Index: require('./index.js'), + InvertedIndex: require('./inverted-index.js'), LinkedList: require('./linked-list.js'), MultiIndex: require('./multi-index.js'), MultiMap: require('./multi-map.js'), @@ -27,6 +28,7 @@ module.exports = { Stack: require('./stack.js'), SuffixArray: SuffixArray, GeneralizedSuffixArray: SuffixArray.GeneralizedSuffixArray, + Set: require('./set.js'), SymSpell: require('./symspell.js'), Trie: require('./trie.js'), VPTree: require('./vp-tree.js') diff --git a/inverted-index.js b/inverted-index.js index c19c7abb..0f210b20 100644 --- a/inverted-index.js +++ b/inverted-index.js @@ -7,8 +7,6 @@ var iterateOver = require('./utils/iterate.js'), helpers = require('./set.js'); -var intersect = helpers.intersect; - function identity(x) { return x; } @@ -100,24 +98,27 @@ InvertedIndex.prototype.add = function(doc) { }; /** - * Method used to query the index. + * Method used to query the index in a AND fashion. * * @param {any} query - Query * @return {Set} - Intersection of documents matching the query. */ -InvertedIndex.prototype.get = function(query) { +InvertedIndex.prototype.query = function(query) { // Early termination if (!this.size) - return new Set(); + return []; // First we need to tokenize the query var tokens = this.queryTokenizer(query); if (!Array.isArray(tokens)) - throw new Error('mnemonist/InvertedIndex.get: tokenizer function should return an array of tokens.'); + throw new Error('mnemonist/InvertedIndex.query: tokenizer function should return an array of tokens.'); + + if (!tokens.length) + return []; - var matchingSet, + var matchingSet = new Set(), token, set, i, @@ -131,10 +132,61 @@ InvertedIndex.prototype.get = function(query) { if (!set || !set.size) return new Set(); - if (!matchingSet) - matchingSet = new Set(set); + if (!matchingSet.size) + helpers.add(matchingSet, set); else - intersect(matchingSet, set); + helpers.intersect(matchingSet, set); + } + + var results = new Array(matchingSet.size), + iterator = matchingSet.values(), + step; + + i = 0; + + while ((step = iterator.next(), !step.done)) + results[i++] = this.items[step.value]; + + return results; +}; +InvertedIndex.prototype.andQuery = InvertedIndex.prototype.query; + +/** + * Method used to query the index in an OR fashion. + * + * @param {any} query - Query + * @return {Set} - Union of documents matching the query. + */ +InvertedIndex.prototype.orQuery = function(query) { + + // Early termination + if (!this.size) + return []; + + // First we need to tokenize the query + var tokens = this.queryTokenizer(query); + + if (!Array.isArray(tokens)) + throw new Error('mnemonist/InvertedIndex.orQuery: tokenizer function should return an array of tokens.'); + + if (!tokens.length) + return []; + + var matchingSet = new Set(), + token, + set, + i, + l; + + for (i = 0, l = tokens.length; i < l; i++) { + token = tokens[i]; + set = this.mapping.get(token); + + // Empty set + if (!set || !set.size) + continue; + + helpers.add(matchingSet, set); } var results = new Array(matchingSet.size), diff --git a/package.json b/package.json index 5a6cb67b..efcf2d28 100644 --- a/package.json +++ b/package.json @@ -23,6 +23,7 @@ "fibonacci heap", "stack", "queue", + "inverted index", "linked list", "trie", "bag", diff --git a/test/inverted-index.js b/test/inverted-index.js index 938800cf..ec340399 100644 --- a/test/inverted-index.js +++ b/test/inverted-index.js @@ -73,20 +73,32 @@ describe('InvertedIndex', function() { it('should be possible to query the index.', function() { var index = InvertedIndex.from(DOCS, tokenizer); - var results = index.get('A mouse.'); + var results = index.query('A mouse.'); assert.deepEqual(results, DOCS); - results = index.get('cheese'); + results = index.query('cheese'); assert.deepEqual(results, DOCS.slice(1)); - results = index.get('The cat'); + results = index.query('The cat'); assert.deepEqual(results, [DOCS[0]]); - results = index.get('The cat likes'); + results = index.query('The cat likes'); assert.deepEqual(results, []); - results = index.get('really something'); + results = index.query('really something'); assert.deepEqual(results, DOCS.slice(-1)); + + assert.deepEqual(index.query('really'), index.andQuery('really')); + }); + + it('should be possible to query union.', function() { + var index = InvertedIndex.from(DOCS, tokenizer); + + var results = index.orQuery('something'); + assert.deepEqual(results, DOCS.slice(-1)); + + results = index.orQuery('The cat is really'); + assert.deepEqual(results, [DOCS[0], DOCS[2]]); }); it('should be possible to iterate using #.forEach', function() {