-
Notifications
You must be signed in to change notification settings - Fork 0
/
LuceneSqlServer.py
76 lines (68 loc) · 2.5 KB
/
LuceneSqlServer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import pypyodbc
import lucene
import sys
from java.io import File
from org.apache.lucene.analysis.miscellaneous import LimitTokenCountAnalyzer
from org.apache.lucene.analysis.standard import StandardAnalyzer
from org.apache.lucene.document import Document, Field, FieldType
from org.apache.lucene.index import FieldInfo, IndexWriter, IndexWriterConfig
from org.apache.lucene.store import SimpleFSDirectory
from org.apache.lucene.util import Version
from org.apache.lucene.search import IndexSearcher
from org.apache.lucene.index import DirectoryReader
from org.apache.lucene.queryparser.classic import QueryParser
class IndexSql(object):
"""Usage: python CreateIndex or python SearchQuery"""
def GetPoems(self):
connection = pypyodbc.connect('Driver={SQL Server};Server=***;Database=***;uid=***;pwd=***;')
cursor = connection.cursor()
sqlcommand = ("SELECT text FROM table")
cursor.execute(sqlcommand)
query = cursor.fetchone()
cells = []
while query:
text = str(query)
##text.decode('unicode-escape')
cells.append(text)
query = cursor.fetchone()
cursor.close()
return cells
def CreateIndex(self):
analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
store = SimpleFSDirectory(File('D:\Code\PythonLearning\Python\idx'))
config = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
writer = IndexWriter(store, config)
doc = Document()
for text in GetPoems():
field = Field("content", text, Field.Store.YES, Field.Index.ANALYZED)
doc.add(field)
writer.addDocument(doc)
writer.commit()
writer.close()
def SearchQuery(self):
analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
store = SimpleFSDirectory(File('D:\Code\PythonLearning\Python\idx'))
searcher = IndexSearcher(DirectoryReader.open(store))
query = QueryParser(Version.LUCENE_CURRENT, "content", analyzer).parse(raw_input("Query:"))
scoreDocs = searcher.search(query, 50).scoreDocs
for scoreDoc in scoreDocs:
doc = searcher.doc(scoreDoc.doc)
print 'content:', doc.get("content")
del searcher
def __init__(self):
lucene.initVM()
print "lucene version is:", lucene.VERSION
if __name__ == '__main__':
if len(sys.argv) < 2:
print IndexSql.__doc__
sys.exit(1)
index = IndexSql()
try:
if str(sys.argv[1]) == 'CreateIndex':
index.CreateIndex()
elif str(sys.argv[1]) == 'SearchQuery':
index.SearchQuery()
except Exception, e:
print "Failed: ", e
raise e