Skip to content

Commit

Permalink
build trigrams
Browse files Browse the repository at this point in the history
  • Loading branch information
nticaric committed May 18, 2016
1 parent 4473740 commit e613df9
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 20 deletions.
11 changes: 11 additions & 0 deletions src/Indexer/TNTIndexer.php
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,17 @@ public function buildDictionary($filename, $count = -1, $hits = true, $docs = fa
file_put_contents($filename, $dictionary, LOCK_EX);
}

public function buildTrigrams($keyword)
{
$t = "__" . $keyword . "__";
$trigrams = "";
for ($i = 0; $i < strlen($t) - 2; $i++) {
$trigrams .= substr($t, $i, 3) . " ";
}

return trim($trigrams);
}

public function info($text)
{
if (!$this->disableOutput) {
Expand Down
65 changes: 45 additions & 20 deletions tests/indexer/IndexerTest.php
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
<?php

use TeamTNT\TNTSearch\TNTSearch;
use TeamTNT\TNTSearch\Indexer\TNTIndexer;
use TeamTNT\TNTSearch\TNTSearch;

class TNTIndexerTest extends PHPUnit_Framework_TestCase
{
{
protected $indexName = "testIndex";
protected $config = [
protected $config = [
'driver' => 'sqlite',
'database' => __DIR__.'/../_files/articles.sqlite',
'database' => __DIR__ . '/../_files/articles.sqlite',
'host' => 'localhost',
'username' => 'testUser',
'password' => 'testPass',
'storage' => __DIR__.'/../_files/'
'storage' => __DIR__ . '/../_files/',
];

public function testSearch()
Expand All @@ -21,14 +21,14 @@ public function testSearch()

$tnt->loadConfig($this->config);

$indexer = $tnt->createIndex($this->indexName);
$indexer = $tnt->createIndex($this->indexName);
$indexer->disableOutput = true;
$indexer->query('SELECT id, title, article FROM articles;');
$indexer->run();

$tnt->selectIndex($this->indexName);
$tnt->asYouType = true;
$res = $tnt->search('Juliet');
$res = $tnt->search('Juliet');
$this->assertEquals([9, 5, 6, 7, 8, 10], $res['ids']);

$res = $tnt->search('Queen Mab');
Expand All @@ -40,24 +40,24 @@ public function testBreakIntoTokens()
$indexer = new TNTIndexer;

$text = "This is some text";
$res = $indexer->breakIntoTokens($text);
$res = $indexer->breakIntoTokens($text);

$this->assertContains("This", $res);
$this->assertContains("text", $res);

$text = "123 123 123";
$res = $indexer->breakIntoTokens($text);
$res = $indexer->breakIntoTokens($text);
$this->assertContains("123", $res);

$text = "Hi! This text contains an [email protected]. Test's email 123.";
$res = $indexer->breakIntoTokens($text);
$res = $indexer->breakIntoTokens($text);
$this->assertContains("test", $res);
$this->assertContains("email", $res);
$this->assertContains("contains", $res);
$this->assertContains("123", $res);

$text = "Superman (1941)";
$res = $indexer->breakIntoTokens($text);
$res = $indexer->breakIntoTokens($text);
$this->assertContains("Superman", $res);
$this->assertContains("1941", $res);
}
Expand All @@ -75,9 +75,9 @@ public function testIfCroatianStemmerIsSet()
$indexer->run();

$this->index = new PDO('sqlite:' . $this->config['storage'] . $this->indexName);
$query = "SELECT * FROM info WHERE key = 'stemmer'";
$docs = $this->index->query($query);
$value = $docs->fetch(PDO::FETCH_ASSOC)['value'];
$query = "SELECT * FROM info WHERE key = 'stemmer'";
$docs = $this->index->query($query);
$value = $docs->fetch(PDO::FETCH_ASSOC)['value'];
$this->assertEquals('croatian', $value);
}

Expand All @@ -94,16 +94,41 @@ public function testIfGermanStemmerIsSet()
$indexer->run();

$this->index = new PDO('sqlite:' . $this->config['storage'] . $this->indexName);
$query = "SELECT * FROM info WHERE key = 'stemmer'";
$docs = $this->index->query($query);
$value = $docs->fetch(PDO::FETCH_ASSOC)['value'];
$query = "SELECT * FROM info WHERE key = 'stemmer'";
$docs = $this->index->query($query);
$value = $docs->fetch(PDO::FETCH_ASSOC)['value'];
$this->assertEquals('german', $value);
}

public function testBuildTrigrams()
{
$indexer = new TNTIndexer;
$trigrams = $indexer->buildTrigrams('created');
$this->assertEquals('__c _cr cre rea eat ate ted ed_ d__', $trigrams);

$trigrams = $indexer->buildTrigrams('mood');
$this->assertEquals('__m _mo moo ood od_ d__', $trigrams);

$trigrams = $indexer->buildTrigrams('death');
$this->assertEquals('__d _de dea eat ath th_ h__', $trigrams);

$trigrams = $indexer->buildTrigrams('behind');
$this->assertEquals('__b _be beh ehi hin ind nd_ d__', $trigrams);

$trigrams = $indexer->buildTrigrams('usually');
$this->assertEquals('__u _us usu sua ual all lly ly_ y__', $trigrams);

$trigrams = $indexer->buildTrigrams('created');
$this->assertEquals('__c _cr cre rea eat ate ted ed_ d__', $trigrams);

}

public function tearDown()
{
if(file_exists(__DIR__.'/../_files/'.$this->indexName))
unlink(__DIR__.'/../_files/'.$this->indexName);
if (file_exists(__DIR__ . '/../_files/' . $this->indexName)) {
unlink(__DIR__ . '/../_files/' . $this->indexName);
}

}

}
}

0 comments on commit e613df9

Please sign in to comment.