Skip to content

Commit

Permalink
Merge pull request #9 from neocl/dev
Browse files Browse the repository at this point in the history
Release version 0.1a4
  • Loading branch information
letuananh authored Jul 19, 2018
2 parents 8b30d90 + b3b4db8 commit f8bae3e
Show file tree
Hide file tree
Showing 18 changed files with 159 additions and 67 deletions.
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
include README.rst
include CHANGES.md
include LICENSE
include requirements*.txt
recursive-include jamdict/data/ *.sql
recursive-include jamdict/data/ *.json
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,10 @@ I have mirrored these files to Google Drive so you can download there too:
[https://drive.google.com/drive/folders/1z4zF9ImZlNeTZZplflvvnpZfJp3WVLPk](https://drive.google.com/drive/folders/1z4zF9ImZlNeTZZplflvvnpZfJp3WVLPk)

Official website
- JMdict: [http://edrdg.org/jmdict/edict_doc.html](http://edrdg.org/jmdict/edict_doc.html)
- kanjidic2: [http://www.edrdg.org/kanjidic/kanjd2index.html](http://www.edrdg.org/kanjidic/kanjd2index.html)
- KRADFILE: [http://www.edrdg.org/krad/kradinf.html](http://www.edrdg.org/krad/kradinf.html)

* JMdict: [http://edrdg.org/jmdict/edict_doc.html](http://edrdg.org/jmdict/edict_doc.html)
* kanjidic2: [http://www.edrdg.org/kanjidic/kanjd2index.html](http://www.edrdg.org/kanjidic/kanjd2index.html)
* KRADFILE: [http://www.edrdg.org/krad/kradinf.html](http://www.edrdg.org/krad/kradinf.html)


# Sample codes
Expand Down
3 changes: 2 additions & 1 deletion jamdict/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@

########################################################################

from . import __version__ as version_info
from .__version__ import __author__, __email__, __copyright__, __maintainer__
from .__version__ import __credits__, __license__, __description__, __url__
from .__version__ import __version_major__, __version_long__, __version__, __status__
Expand All @@ -54,4 +55,4 @@
from .kanjidic2_sqlite import KanjiDic2SQLite
from .util import Jamdict, JMDictXML, KanjiDic2XML
__all__ = ['Jamdict', 'JMDictSQLite', 'JMDictXML', 'KanjiDic2SQLite', 'KanjiDic2XML',
"__version__", "__author__", "__description__", "__copyright__"]
"__version__", "__author__", "__description__", "__copyright__", "version_info"]
2 changes: 2 additions & 0 deletions jamdict/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from . import tools
tools.main()
2 changes: 1 addition & 1 deletion jamdict/__version__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@
__url__ = "https://github.com/neocl/jamdict"
__maintainer__ = "Le Tuan Anh"
__version_major__ = "0.1"
__version__ = "{}a3".format(__version_major__)
__version__ = "{}a4".format(__version_major__)
__version_long__ = "{} - Alpha".format(__version_major__)
__status__ = "Prototype"
2 changes: 1 addition & 1 deletion jamdict/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
import logging

from chirptext import AppConfig
from chirptext.io import read_file, write_file
from chirptext.chio import read_file, write_file

# ----------------------------------------------------------------------
# Configuration
Expand Down
2 changes: 1 addition & 1 deletion jamdict/data/setup_jmdict.sql
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* Add meta info */
CREATE TABLE IF NOT EXISTS meta (
key TEXT UNIQUE,
key TEXT PRIMARY KEY NOT NULL,
value TEXT NOT NULL
);

Expand Down
6 changes: 3 additions & 3 deletions jamdict/jmdict.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
import logging
from lxml import etree

from chirptext import io as chio
from chirptext import chio

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -82,9 +82,9 @@ def set_info(self, info):
logging.warning("WARNING: multiple info tag")
self.info = info

def text(self, compact=True, separator=' '):
def text(self, compact=True, separator=' ', no_id=False):
tmp = []
if not compact:
if not compact and not no_id:
tmp.append('[id#%s]' % self.idseq)
if self.kana_forms:
tmp.append(self.kana_forms[0].text)
Expand Down
8 changes: 4 additions & 4 deletions jamdict/jmdict_sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@

from puchikarui import Schema
from . import __version__ as JAMDICT_VERSION, __url__ as JAMDICT_URL
from .jmdict import JMDEntry, EntryInfo, Link, BibInfo, Audit, KanjiForm, KanaForm, Sense, SenseGloss, LSource
from .jmdict import Meta, JMDEntry, EntryInfo, Link, BibInfo, Audit, KanjiForm, KanaForm, Sense, SenseGloss, LSource


# -------------------------------------------------------------------------------
Expand Down Expand Up @@ -85,7 +85,7 @@ def __init__(self, data_source=":memory:", setup_script=None, setup_file=None, *
self.add_script(SETUP_SCRIPT)
self.add_file(JMDICT_SETUP_FILE)
# Meta
self.add_table('meta', ['jmdict_version', 'jmdict_url', 'generator', 'generator_version', 'generator_url'])
self.add_table('meta', ['key', 'value'], proto=Meta).set_id('key')
self.add_table('Entry', ['idseq'])
self.add_table('Link', ['ID', 'idseq', 'tag', 'desc', 'uri'])
self.add_table('Bib', ['ID', 'idseq', 'tag', 'text'])
Expand Down Expand Up @@ -146,8 +146,8 @@ def search(self, query, ctx=None):
if ctx is None:
with self.ctx() as ctx:
return self.search(query, ctx=ctx)
where = "idseq IN (SELECT idseq FROM Kanji WHERE text like ?) OR idseq IN (SELECT idseq FROM Kana WHERE text like ?)"
params = [query, query]
where = "idseq IN (SELECT idseq FROM Kanji WHERE text like ?) OR idseq IN (SELECT idseq FROM Kana WHERE text like ?) OR idseq IN (SELECT idseq FROM sense JOIN sensegloss ON sense.ID == sensegloss.sid WHERE text like ?)"
params = [query, query, query]
try:
if query.startswith('id#'):
query_int = int(query[3:])
Expand Down
4 changes: 2 additions & 2 deletions jamdict/kanjidic2.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
import logging
from lxml import etree

from chirptext import io as chio
from chirptext import chio


# ------------------------------------------------------------------------------
Expand Down Expand Up @@ -366,7 +366,7 @@ def __init__(self, qc_type='', value='', skip_misclass=""):
- stroke_count - a mistake in the number of strokes
- stroke_and_posn - mistakes in both division and strokes
- stroke_diff - ambiguous stroke counts depending on glyph
--> """
S --> """
self.cid = None
self.qc_type = qc_type
self.value = value
Expand Down
62 changes: 38 additions & 24 deletions jamdict/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,13 @@

import os


from chirptext import confirm, TextReport, Timer
from chirptext.cli import CLIApp, setup_logging

from jamdict import Jamdict
from jamdict import config
from jamdict import version_info

# -------------------------------------------------------------------------------
# Configuration
Expand All @@ -53,7 +55,11 @@
JMD_XML = config.get_file('JMDICT_XML')
KD2_XML = config.get_file('KD2_XML')
JMD_DB = config.get_file('JAMDICT_DB')
setup_logging('logging.json', 'logs')

if os.path.isfile('logging.json'):
setup_logging('logging.json', 'logs')
else:
setup_logging(os.path.join(config.home_dir(), 'logging.json'), 'logs')


# -------------------------------------------------------------------------------
Expand Down Expand Up @@ -99,39 +105,41 @@ def import_data(cli, args):
print("Database paths were not provided. Process aborted.")


def dump_result(results):
def dump_result(results, report=None):
if report is None:
report = TextReport()
if results.entries:
print("=" * 40)
print("Found entries")
print("=" * 40)
report.print("=" * 40)
report.print("Found entries")
report.print("=" * 40)
for e in results.entries:
kj = ', '.join([k.text for k in e.kanji_forms])
kn = ', '.join([k.text for k in e.kana_forms])
print("Entry: {} | Kj: {} | Kn: {}".format(e.idseq, kj, kn))
print("-" * 20)
report.print("Entry: {} | Kj: {} | Kn: {}".format(e.idseq, kj, kn))
report.print("-" * 20)
for idx, s in enumerate(e.senses):
print("{idx}. {s}".format(idx=idx + 1, s=s))
print('')
report.print("{idx}. {s}".format(idx=idx + 1, s=s))
report.print('')
else:
print("No dictionary entry was found.")
report.print("No dictionary entry was found.")
if results.chars:
print("=" * 40)
print("Found characters")
print("=" * 40)
report.print("=" * 40)
report.print("Found characters")
report.print("=" * 40)
for c in results.chars:
print("Char: {} | Strokes: {}".format(c, c.stroke_count))
print("-" * 20)
report.print("Char: {} | Strokes: {}".format(c, c.stroke_count))
report.print("-" * 20)
for rmg in c.rm_groups:
print("Readings:", ", ".join([r.value for r in rmg.readings]))
print("Meanings:", ", ".join([m.value for m in rmg.meanings if not m.m_lang or m.m_lang == 'en']))
report.print("Readings:", ", ".join([r.value for r in rmg.readings]))
report.print("Meanings:", ", ".join([m.value for m in rmg.meanings if not m.m_lang or m.m_lang == 'en']))
else:
print("No character was found.")
report.print("No character was found.")


def lookup(cli, args):
'''Lookup words by kanji/kana'''
jam = get_jam(cli, args)
results = jam.lookup(args.query)
results = jam.lookup(args.query, strict_lookup=args.strict)
if args.format == 'json':
print(results.to_json())
else:
Expand All @@ -148,11 +156,15 @@ def file_status(file_path):

def show_info(cli, args):
''' Show jamdict configuration (data folder, configuration file location, etc.) '''
print("Configuration location: {}".format(config._get_config_manager().locate_config()))
print("-" * 40)
print("Jamdict DB location : {} - {}".format(args.jdb, file_status(args.jdb)))
print("JMDict XML file : {} - {}".format(args.jmdxml, file_status(args.jmdxml)))
print("KanjiDic2 XML file : {} - {}".format(args.kd2xml, file_status(args.kd2xml)))
output = TextReport(args.output) if 'output' in args else TextReport()
output.header("Jamdict | {} - Version: {}".format(version_info.__description__, version_info.__version__), level='h0')
output.header("Basic configuration")
output.print("JAMDICT_HOME: {}".format(config.home_dir()))
output.print("Configuration location: {}".format(config._get_config_manager().locate_config()))
output.header("Data files")
output.print("Jamdict DB location: {} - {}".format(args.jdb, file_status(args.jdb)))
output.print("JMDict XML file : {} - {}".format(args.jmdxml, file_status(args.jmdxml)))
output.print("KanjiDic2 XML file : {} - {}".format(args.kd2xml, file_status(args.kd2xml)))


# -------------------------------------------------------------------------------
Expand All @@ -178,13 +190,15 @@ def main():

# show info
info_task = app.add_task('info', func=show_info)
info_task.add_argument('-o', '--output', help='Write information to a text file')
add_data_config(info_task)

# look up task
lookup_task = app.add_task('lookup', func=lookup)
lookup_task.add_argument('query', help='kanji/kana')
lookup_task.add_argument('-f', '--format', help='json or text')
lookup_task.add_argument('--compact', action='store_true')
lookup_task.add_argument('-s', '--strict', action='store_true')
lookup_task.set_defaults(func=lookup)
add_data_config(lookup_task)

Expand Down
29 changes: 17 additions & 12 deletions jamdict/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@
import logging
import threading
from collections import defaultdict as dd
from collections import OrderedDict
from chirptext.deko import HIRAGANA, KATAKANA

from . import config
from .jmdict import JMDictXMLParser
Expand All @@ -73,13 +75,13 @@ def __init__(self, entries, chars):
self.entries = entries if entries else []
self.chars = chars if chars else []

def text(self, compact=True, entry_sep='。', separator=' | '):
def text(self, compact=True, entry_sep='。', separator=' | ', no_id=False, with_chars=True):
output = []
if self.entries:
entries_txt = str(entry_sep.join(e.text(compact=compact, separator='') for e in self.entries))
entries_txt = str(entry_sep.join(e.text(compact=compact, separator='', no_id=no_id) for e in self.entries))
output.append("Entries: ")
output.append(entries_txt)
if self.entries:
if self.chars and with_chars:
if compact:
chars_txt = ', '.join(str(c) for c in self.chars)
else:
Expand Down Expand Up @@ -190,9 +192,9 @@ def import_data(self):
getLogger().info("Importing KanjiDic2 data")
self.kd2.insert_chars(self.kd2_xml)

def get_char(self, literal):
def get_char(self, literal, ctx=None):
if self.kd2 is not None:
return self.kd2.get_char(literal)
return self.kd2.get_char(literal, ctx=ctx)
elif self.kd2_xml:
return self.kd2_xml.lookup(literal)
else:
Expand All @@ -206,7 +208,7 @@ def get_entry(self, idseq):
else:
raise LookupError("There is no backend data available")

def lookup(self, query):
def lookup(self, query, strict_lookup=False, lookup_chars=True, ctx=None):
if not self.is_available():
raise LookupError("There is no backend data available")
elif not query:
Expand All @@ -215,18 +217,21 @@ def lookup(self, query):
entries = []
chars = []
if self.jmdict is not None:
entries = self.jmdict.search(query)
entries = self.jmdict.search(query, ctx=ctx)
elif self.jmdict_xml:
entries = self.jmdict_xml.lookup(query)
if self.has_kd2():
if lookup_chars and self.has_kd2():
# lookup each character in query and kanji readings of each found entries
chars_to_search = set(query)
if entries:
chars_to_search = OrderedDict({c: c for c in query})
if not strict_lookup and entries:
# auto add characters from entries
for e in entries:
for k in e.kanji_forms:
chars_to_search.update(k.text)
for c in k.text:
if c not in HIRAGANA and c not in KATAKANA:
chars_to_search[c] = c
for c in chars_to_search:
result = self.get_char(c)
result = self.get_char(c, ctx=ctx)
if result is not None:
chars.append(result)
return LookupResult(entries, chars)
Expand Down
20 changes: 11 additions & 9 deletions jamdol-flask.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,22 +52,22 @@
from flask import Flask, Response
from functools import wraps
from flask import request

from chirptext.cli import setup_logging

from jamdict import Jamdict

# ---------------------------------------------------------------------
# CONFIGURATION
# ---------------------------------------------------------------------

setup_logging('logging.json', 'logs')
app = Flask(__name__, static_url_path="")
# Prefer to use jmdict.en
DB_FILE = os.path.abspath('./data/jamdict.en.db')
if not os.path.isfile(DB_FILE):
DB_FILE = os.path.abspath('./data/jamdict.db')
jmd = Jamdict(db_file=DB_FILE)
jmd = Jamdict()


def get_logger():
logging.getLogger(__name__)
def getLogger():
return logging.getLogger(__name__)


# ---------------------------------------------------------------------
Expand Down Expand Up @@ -100,9 +100,11 @@ def get_entry(idseq):


@app.route('/jamdol/search/<query>', methods=['GET'])
@app.route('/jamdol/search/<strict>/<query>', methods=['GET'])
@jsonp
def search(query):
results = jmd.lookup(query)
def search(query, strict=None):
getLogger().info("Query = {}".format(query))
results = jmd.lookup(query, strict_lookup=strict)
return results.to_json()


Expand Down
Loading

0 comments on commit f8bae3e

Please sign in to comment.