From 1d158dc65f8a266c62c1a366647d42bf4a923f99 Mon Sep 17 00:00:00 2001 From: Le Tuan Anh Date: Mon, 29 Jun 2020 19:42:25 +0800 Subject: [PATCH 01/11] Only create db ctx instance when needed --- jamdict/util.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/jamdict/util.py b/jamdict/util.py index bcb7468..54c12e2 100644 --- a/jamdict/util.py +++ b/jamdict/util.py @@ -158,12 +158,7 @@ def __init__(self, db_file=None, kd2_file=None, self._jmne_xml = None self.__krad_map = None self.reuse_ctx = reuse_ctx - self.__jm_ctx = None - try: - if self.reuse_ctx and self.db_file and os.path.isfile(self.db_file): - self.__jm_ctx = self.jmdict.ctx() - except Exception: - getLogger().warning("JMdict data could not be accessed.") + self.__jm_ctx = None # for reusing database context def __del__(self): if self.__jm_ctx is not None: @@ -173,6 +168,17 @@ def __del__(self): except Exception: pass + def __make_db_ctx(self): + ''' Try to reuse context if allowed ''' + try: + if not self.reuse_ctx: + return self.jmdict.ctx() + elif self.__jm_ctx is None and self.db_file and os.path.isfile(self.db_file): + self.__jm_ctx = self.jmdict.ctx() + except Exception: + getLogger().warning("JMdict data could not be accessed.") + return self.__jm_ctx + @property def db_file(self): return self.__db_file @@ -352,8 +358,7 @@ def lookup(self, query, strict_lookup=False, lookup_chars=True, ctx=None, lookup raise LookupError("There is no backend data available") elif not query: raise ValueError("Query cannot be empty") - if ctx is None and self.reuse_ctx and self.__jm_ctx is not None: - ctx = self.__jm_ctx + ctx = self.__make_db_ctx() # Lookup words entries = [] chars = [] From b27fc9a75af6dc45f31a72c742f78abf7f6c6911 Mon Sep 17 00:00:00 2001 From: Le Tuan Anh Date: Wed, 1 Jul 2020 18:45:36 +0800 Subject: [PATCH 02/11] use __make_db_ctx() to reuse db ctx --- jamdict/util.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/jamdict/util.py b/jamdict/util.py index 54c12e2..b056dec 100644 --- a/jamdict/util.py +++ b/jamdict/util.py @@ -321,14 +321,18 @@ def import_data(self): def get_ne(self, idseq, ctx=None): ''' Get name entity by idseq in JMnedict ''' if self.jmnedict is not None: + if ctx is None: + ctx = self.__make_db_ctx() return self.jmnedict.get_ne(idseq, ctx=ctx) elif self.jmnedict_xml_file: return self.jmne_xml.lookup(idseq) else: - raise LookupError("There is no KanjiDic2 data source available") + raise LookupError("There is no JMnedict data source available") def get_char(self, literal, ctx=None): if self.kd2 is not None: + if ctx is None: + ctx = self.__make_db_ctx() return self.kd2.get_char(literal, ctx=ctx) elif self.kd2_xml: return self.kd2_xml.lookup(literal) From 1f1b193059aad5a0fb7bc36b7d93a5b3332834a9 Mon Sep 17 00:00:00 2001 From: Le Tuan Anh Date: Thu, 15 Apr 2021 22:48:41 +0800 Subject: [PATCH 03/11] expose KRad by default --- jamdict/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/jamdict/__init__.py b/jamdict/__init__.py index aed1b60..5c4694f 100644 --- a/jamdict/__init__.py +++ b/jamdict/__init__.py @@ -54,5 +54,6 @@ from .jmdict_sqlite import JMDictSQLite from .kanjidic2_sqlite import KanjiDic2SQLite from .util import Jamdict, JMDictXML, KanjiDic2XML -__all__ = ['Jamdict', 'JMDictSQLite', 'JMDictXML', 'KanjiDic2SQLite', 'KanjiDic2XML', +from .krad import KRad +__all__ = ['Jamdict', 'JMDictSQLite', 'JMDictXML', 'KanjiDic2SQLite', 'KanjiDic2XML', 'KRad', "__version__", "__author__", "__description__", "__copyright__", "version_info"] From b6cb3d0b9f393d0bcbe417a5be0f0a07d5d36b36 Mon Sep 17 00:00:00 2001 From: Le Tuan Anh Date: Thu, 15 Apr 2021 22:48:54 +0800 Subject: [PATCH 04/11] make config file optional --- jamdict/config.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/jamdict/config.py b/jamdict/config.py index 90a7c0a..3f3c79c 100644 --- a/jamdict/config.py +++ b/jamdict/config.py @@ -58,17 +58,25 @@ def _get_config_manager(): return __app_config +def _ensure_config(): + # need to create a config + config_dir = os.path.expanduser('~/.jamdict/') + if not os.path.exists(config_dir): + os.makedirs(config_dir) + cfg_loc = os.path.join(config_dir, 'config.json') + default_config = read_file(CONFIG_TEMPLATE) + getLogger().warning("Jamdict configuration file could not be found. A new configuration file will be generated at {}".format(cfg_loc)) + getLogger().debug("Default config: {}".format(default_config)) + write_file(cfg_loc, default_config) + + def read_config(): if not __app_config.config and not __app_config.locate_config(): - # need to create a config - config_dir = os.path.expanduser('~/.jamdict/') - if not os.path.exists(config_dir): - os.makedirs(config_dir) - cfg_loc = os.path.join(config_dir, 'config.json') - default_config = read_file(CONFIG_TEMPLATE) - getLogger().warning("Jamdict configuration file could not be found. A new configuration file will be generated at {}".format(cfg_loc)) - getLogger().debug("Default config: {}".format(default_config)) - write_file(cfg_loc, default_config) + # _ensure_config() + # [2021-04-15] data can be installed via PyPI + # configuration file can be optional now + # load config from default template + __app_config.load(CONFIG_TEMPLATE) # read config config = __app_config.config return config From 96b9915ed27802b9d19ae0abc471a451802e5b70 Mon Sep 17 00:00:00 2001 From: Le Tuan Anh Date: Thu, 15 Apr 2021 22:49:45 +0800 Subject: [PATCH 05/11] document code --- jamdict/jmdict.py | 79 +++++++++------------ jamdict/kanjidic2.py | 78 ++++++++------------ jamdict/krad.py | 60 +++++----------- jamdict/util.py | 165 +++++++++++++++++++++++++++++++++++++------ 4 files changed, 226 insertions(+), 156 deletions(-) diff --git a/jamdict/jmdict.py b/jamdict/jmdict.py index dd1c26d..487d228 100644 --- a/jamdict/jmdict.py +++ b/jamdict/jmdict.py @@ -1,54 +1,38 @@ # -*- coding: utf-8 -*- -''' -Python library for manipulating Jim Breen's JMdict -Latest version can be found at https://github.com/neocl/jamdict - -This package uses the [EDICT][1] and [KANJIDIC][2] dictionary files. -These files are the property of the [Electronic Dictionary Research and Development Group][3], and are used in conformance with the Group's [licence][4]. - -[1]: http://www.csse.monash.edu.au/~jwb/edict.html -[2]: http://www.csse.monash.edu.au/~jwb/kanjidic.html -[3]: http://www.edrdg.org/ -[4]: http://www.edrdg.org/edrdg/licence.html - -References: - JMDict website: - http://www.csse.monash.edu.au/~jwb/edict.html - Python documentation: - https://docs.python.org/ - PEP 257 - Python Docstring Conventions: - https://www.python.org/dev/peps/pep-0257/ - -@author: Le Tuan Anh -@license: MIT -''' - -# Copyright (c) 2016, Le Tuan Anh -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. +# Python library for manipulating Jim Breen's JMdict +# Latest version can be found at https://github.com/neocl/jamdict +# +# This package uses the [EDICT][1] and [KANJIDIC][2] dictionary files. +# These files are the property of the [Electronic Dictionary Research and Development Group][3], and are used in conformance with the Group's [licence][4]. +# +# [1]: http://www.csse.monash.edu.au/~jwb/edict.html +# [2]: http://www.csse.monash.edu.au/~jwb/kanjidic.html +# [3]: http://www.edrdg.org/ +# [4]: http://www.edrdg.org/edrdg/licence.html +# +# References: +# JMDict website: +# http://www.csse.monash.edu.au/~jwb/edict.html +# Python documentation: +# https://docs.python.org/ +# PEP 257 - Python Docstring Conventions: +# https://www.python.org/dev/peps/pep-0257/ +# +# @author: Le Tuan Anh +# @license: MIT ######################################################################## import os import logging -from lxml import etree +try: + from lxml import etree + _LXML_AVAILABLE = True +except Exception as e: + # logging.getLogger(__name__).debug("lxml is not available, fall back to xml.etree.ElementTree") + from xml.etree import ElementTree as etree + _LXML_AVAILABLE = False from chirptext import chio @@ -58,10 +42,13 @@ class JMDEntry(object): - ''' Entries consist of kanji elements, reading elements, + ''' Represents a dictionary Word entry. + + Entries consist of kanji elements, reading elements, general information and sense elements. Each entry must have at least one reading element and one sense element. Others are optional. - DTD ''' + + XML DTD ''' def __init__(self, idseq=''): # A unique numeric sequence number for each entry diff --git a/jamdict/kanjidic2.py b/jamdict/kanjidic2.py index 24a68b0..ef6120c 100644 --- a/jamdict/kanjidic2.py +++ b/jamdict/kanjidic2.py @@ -1,55 +1,35 @@ # -*- coding: utf-8 -*- -''' -Python library for manipulating Jim Breen's KanjiDic2 -Latest version can be found at https://github.com/neocl/jamdict - -This package uses the [EDICT][1] and [KANJIDIC][2] dictionary files. -These files are the property of the [Electronic Dictionary Research and Development Group][3], and are used in conformance with the Group's [licence][4]. - -[1]: http://www.csse.monash.edu.au/~jwb/edict.html -[2]: http://www.edrdg.org/kanjidic/kanjd2index.html -[3]: http://www.edrdg.org/ -[4]: http://www.edrdg.org/edrdg/licence.html - -References: - JMDict website: - http://www.csse.monash.edu.au/~jwb/edict.html - http://www.edrdg.org/kanjidic/kanjd2index.html - Python documentation: - https://docs.python.org/ - PEP 257 - Python Docstring Conventions: - https://www.python.org/dev/peps/pep-0257/ - -@author: Le Tuan Anh -@license: MIT -''' - -# Copyright (c) 2016, Le Tuan Anh -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. +# Python library for manipulating Jim Breen's KanjiDic2 +# Latest version can be found at https://github.com/neocl/jamdict +# +# This package uses the [EDICT][1] and [KANJIDIC][2] dictionary files. +# These files are the property of the [Electronic Dictionary Research and Development Group][3], and are used in conformance with the Group's [licence][4]. +# +# [1]: http://www.csse.monash.edu.au/~jwb/edict.html +# [2]: http://www.edrdg.org/kanjidic/kanjd2index.html +# [3]: http://www.edrdg.org/ +# [4]: http://www.edrdg.org/edrdg/licence.html +# +# References: +# JMDict website: +# http://www.csse.monash.edu.au/~jwb/edict.html +# http://www.edrdg.org/kanjidic/kanjd2index.html +# +# @author: Le Tuan Anh +# @license: MIT ######################################################################## import os import logging -from lxml import etree +try: + from lxml import etree + _LXML_AVAILABLE = True +except Exception as e: + # logging.getLogger(__name__).debug("lxml is not available, fall back to xml.etree.ElementTree") + from xml.etree import ElementTree as etree + _LXML_AVAILABLE = False from chirptext import chio from chirptext.sino import Radical as KangxiRadical @@ -108,7 +88,10 @@ def __getitem__(self, idx): class Character(object): - """""" + """ Represent a kanji character. + + """ + def __init__(self): """ @@ -138,7 +121,7 @@ def __str__(self): return self.literal def meanings(self, english_only=False): - ''' Accumulate all meanings ''' + ''' Accumulate all meanings as a list of string. Each string is a meaning (i.e. sense) ''' meanings = [] for rm in self.rm_groups: for m in rm.meanings: @@ -149,6 +132,7 @@ def meanings(self, english_only=False): @property def components(self): + ''' Kanji writing components that compose this character ''' if self.literal in krad.krad: return krad.krad[self.literal] else: diff --git a/jamdict/krad.py b/jamdict/krad.py index ade09fa..5d12e1c 100644 --- a/jamdict/krad.py +++ b/jamdict/krad.py @@ -1,47 +1,24 @@ # -*- coding: utf-8 -*- ''' -Module for retrieving kanji components (i.e. radicals) -Latest version can be found at https://github.com/neocl/jamdict - -This package uses the RADKFILE/KRADFILE[1] file. -These files are the property of the [Electronic Dictionary Research and Development Group][2], and are used in conformance with the Group's [licence][3]. - -[1]: http://www.edrdg.org/krad/kradinf.html -[2]: http://www.edrdg.org/ -[3]: http://www.edrdg.org/edrdg/licence.html - -References: - JMDict website: - http://www.csse.monash.edu.au/~jwb/edict.html - Python documentation: - https://docs.python.org/ - PEP 257 - Python Docstring Conventions: - https://www.python.org/dev/peps/pep-0257/ - -@author: Le Tuan Anh -@license: MIT +jamdict.krad is a module for retrieving kanji components (i.e. radicals) ''' -# Copyright (c) 2016, Le Tuan Anh +# Latest version can be found at https://github.com/neocl/jamdict +# +# This package uses the RADKFILE/KRADFILE[1] file. +# These files are the property of the [Electronic Dictionary Research and Development Group][2], and are used in conformance with the Group's [licence][3]. +# +# [1]: http://www.edrdg.org/krad/kradinf.html +# [2]: http://www.edrdg.org/ +# [3]: http://www.edrdg.org/edrdg/licence.html +# +# References: +# JMDict website: +# http://www.csse.monash.edu.au/~jwb/edict.html # -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. +# @author: Le Tuan Anh +# @license: MIT ######################################################################## @@ -50,7 +27,6 @@ import threading from collections import OrderedDict from collections import defaultdict as dd -from lxml import etree from chirptext import chio from chirptext.sino import Radical @@ -69,9 +45,11 @@ ######################################################################## class KRad: + ''' This class contains mapping from radicals to kanjis (radk) and kanjis to radicals (krad) + + ''' def __init__(self, **kwargs): - """ Kanji Radical management - """ + """ Kanji-Radical mapping """ self.__krad_map = None self.__radk_map = None self.__rads = {} diff --git a/jamdict/util.py b/jamdict/util.py index b056dec..75ff28d 100644 --- a/jamdict/util.py +++ b/jamdict/util.py @@ -52,16 +52,24 @@ import threading from collections import defaultdict as dd from collections import OrderedDict +from typing import List, Tuple + from chirptext.deko import HIRAGANA, KATAKANA from . import config -from .jmdict import JMDictXMLParser +from .jmdict import JMDictXMLParser, JMDEntry from .krad import KRad from .jmdict_sqlite import JMDictSQLite -from .kanjidic2 import Kanjidic2XMLParser +from .kanjidic2 import Kanjidic2XMLParser, Character from .kanjidic2_sqlite import KanjiDic2SQLite from .jmnedict_sqlite import JMNEDictSQLite +try: + import jamdict_data + _JAMDICT_DATA_AVAILABLE = True +except: + _JAMDICT_DATA_AVAILABLE = False + ######################################################################## @@ -71,14 +79,76 @@ def getLogger(): ######################################################################## + +EntryList = List[JMDEntry] +CharacterList = List[Character] + + class LookupResult(object): + ''' Contain lookup results (words, Kanji characters, or named entities) from Jamdict. + + A typical jamdict lookup is like this: + + >>> result = jam.lookup('食べ%る') + + The command above returns a :any:`LookupResult` object which contains found words (:any:`entries`), + kanji characters (:any:`chars`), and named entities (:any:`names`). + ''' + def __init__(self, entries, chars, names=None): - self.entries = entries if entries else [] - self.chars = chars if chars else [] - self.names = names if names else [] + self.__entries = entries if entries else [] + self.__chars = chars if chars else [] + self.__names = names if names else [] + + @property + def entries(self): + ''' A list of words entries + + :returns: a list of :class:`JMDEntry ` object + :rtype: EntryList + ''' + return self.__entries + + @entries.setter + def entries(self, values): + self.__entries = values + + @property + def chars(self): + ''' A list of found kanji characters + + :returns: a list of :class:`Character ` object + :rtype: CharacterList + ''' + return self.__chars + + @chars.setter + def chars(self, values): + self.__chars = values + + @property + def names(self): + ''' A list of found named entities + + :returns: a list of :class:`JMDEntry ` object + :rtype: EntryList + ''' + return self.__names + + @names.setter + def names(self, values): + self.__names = values + def text(self, compact=True, entry_sep='。', separator=' | ', no_id=False, with_chars=True): + ''' Generate a text string that contains all found words, characters, and named entities. + + :param compact: Make the output string more compact (fewer info, fewer whitespaces, etc.) + :param no_id: Do not include jamdict's internal object IDs (for direct query via API) + :param with_chars: Include characters information + :returns: A formatted string ready for display + ''' output = [] if self.entries: entry_txts = [] @@ -132,6 +202,19 @@ def __init__(self, data_source, setup_script=None, setup_file=None, *args, **kwa class Jamdict(object): + ''' Main entry point to access all available dictionaries in jamdict. + + >>> from jamdict import Jamdict + >>> jam = Jamdict() + >>> result = jam.lookup('食べ%る') + # print all word entries + >>> for entry in result.entries: + >>> print(entry) + # print all related characters + >>> for c in result.chars: + >>> print(repr(c)) + ''' + def __init__(self, db_file=None, kd2_file=None, jmd_xml_file=None, kd2_xml_file=None, auto_config=True, auto_expand=True, reuse_ctx=True, @@ -139,16 +222,28 @@ def __init__(self, db_file=None, kd2_file=None, **kwargs): # file paths configuration self.auto_expand = auto_expand - self.db_file = db_file if db_file else config.get_file('JAMDICT_DB') if auto_config else None - self.kd2_file = kd2_file if kd2_file else config.get_file('JAMDICT_DB') if auto_config else None - self.jmnedict_file = jmnedict_file if jmnedict_file else config.get_file('JAMDICT_DB') if auto_config else None - if not self.db_file or not os.path.isfile(self.db_file): - getLogger().warning("JAMDICT_DB could NOT be found. Searching will be extremely slow. Please run `python3 -m jamdict.tools import` first") - if not self.kd2_file or not os.path.isfile(self.kd2_file): - getLogger().warning("Kanjidic2 database could NOT be found. Searching will be extremely slow. Please run `python3 -m jamdict.tools import` first") self.jmd_xml_file = jmd_xml_file if jmd_xml_file else config.get_file('JMDICT_XML') if auto_config else None self.kd2_xml_file = kd2_xml_file if kd2_xml_file else config.get_file('KD2_XML') if auto_config else None self.jmnedict_xml_file = jmnedict_xml_file if jmnedict_xml_file else config.get_file('JMNEDICT_XML') if auto_config else None + + self.db_file = db_file if db_file else config.get_file('JAMDICT_DB') if auto_config else None + if not self.db_file or not os.path.isfile(self.db_file): + if _JAMDICT_DATA_AVAILABLE: + self.db_file = jamdict_data.JAMDICT_DB_PATH + elif self.jmd_xml_file and os.path.isfile(self.jmd_xml_file): + getLogger().warning("JAMDICT_DB could NOT be found. Searching will be extremely slow. Please run `python3 -m jamdict import` first") + self.kd2_file = kd2_file if kd2_file else config.get_file('JAMDICT_DB') if auto_config else None + if not self.kd2_file or not os.path.isfile(self.kd2_file): + if _JAMDICT_DATA_AVAILABLE: + self.kd2_file = None # jamdict_data.JAMDICT_DB_PATH + elif self.kd2_xml_file and os.path.isfile(self.kd2_xml_file): + getLogger().warning("Kanjidic2 database could NOT be found. Searching will be extremely slow. Please run `python3 -m jamdict import` first") + self.jmnedict_file = jmnedict_file if jmnedict_file else config.get_file('JAMDICT_DB') if auto_config else None + if not self.jmnedict_file or not os.path.isfile(self.jmnedict_file): + if _JAMDICT_DATA_AVAILABLE: + self.jmnedict_file = None # jamdict_data.JAMDICT_DB_PATH + elif self.jmnedict_xml_file and os.path.isfile(self.jmnedict_xml_file): + getLogger().warning("JMNE database could NOT be found. Searching will be extremely slow. Please run `python3 -m jamdict import` first") # data sources self._db_sqlite = None self._kd2_sqlite = None @@ -160,6 +255,11 @@ def __init__(self, db_file=None, kd2_file=None, self.reuse_ctx = reuse_ctx self.__jm_ctx = None # for reusing database context + @property + def ready(self): + ''' Check if Jamdict database is available ''' + return os.path.isfile(self.db_file) and self.jmdict is not None + def __del__(self): if self.__jm_ctx is not None: try: @@ -230,7 +330,7 @@ def jmdict(self): @property def kd2(self): if self._kd2_sqlite is None: - if self.kd2_file is not None: + if self.kd2_file is not None and os.path.isfile(self.kd2_file): with threading.Lock(): self._kd2_sqlite = KanjiDic2SQLite(self.kd2_file, auto_expand_path=self.auto_expand) else: @@ -259,7 +359,12 @@ def jmdict_xml(self): @property def krad(self): - ''' Kanji to radicals map ''' + ''' Break a kanji down to writing components + + >>> jam = Jamdict() + >>> print(jam.krad['雲']) + ['一', '雨', '二', '厶'] + ''' if not self.__krad_map: with threading.Lock(): self.__krad_map = KRad() @@ -267,7 +372,12 @@ def krad(self): @property def radk(self): - ''' Radical to kanji map ''' + ''' Find all kanji with a writing component + + >>> jam = Jamdict() + >>> print(jam.radk['鼎']) + {'鼏', '鼒', '鼐', '鼎', '鼑'} + ''' if not self.__krad_map: with threading.Lock(): self.__krad_map = KRad() @@ -302,6 +412,9 @@ def has_jmne(self): return None def is_available(self): + # this function is for developer only + # don't expose it to the public + # ready should be used instead return (self.db_file is not None or self.jmd_xml_file is not None or self.kd2_file is not None or self.kd2_xml_file is not None or self.jmnedict_file is not None or self.jmnedict_xml_file is not None) @@ -348,15 +461,23 @@ def get_entry(self, idseq): raise LookupError("There is no backend data available") def lookup(self, query, strict_lookup=False, lookup_chars=True, ctx=None, lookup_ne=True, **kwargs): - ''' Search words and characters and return a LookupResult object. + ''' Search words, characters, and characters. Keyword arguments: - query --- Text to query, may contains wildcard characters - exact_match --- use exact SQLite matching (==) instead of wildcard matching (LIKE) - strict_lookup --- Only look up the Kanji characters in query (i.e. discard characters from variants) - lookup_chars --- set lookup_chars to False to disable character lookup - ctx --- Database access context, can be reused for better performance - lookup_ne --- Lookup name-entities + + :param query: Text to query, may contains wildcard characters. Use `?` for 1 exact character and `%` to match any number of characters. + :param strict_lookup: only look up the Kanji characters in query (i.e. discard characters from variants) + :type strict_lookup: bool + :param: lookup_chars: set lookup_chars to False to disable character lookup + :type lookup_chars: bool + :param: ctx: database access context, can be reused for better performance. Normally users do not have to touch this and database connections will be reused by default. + :param lookup_ne: set lookup_ne to False to disable name-entities lookup + :type lookup_ne: bool + :returns: Return a LookupResult object. + :rtype: :class:`jamdict.util.LookupResult` + + >>> # match any word that starts with "食べ" and ends with "る" (anything from between is fine) + >>> results = jam.lookup('食べ%る') ''' if not self.is_available(): raise LookupError("There is no backend data available") From 7cdeefd1e98cfb46b0acddc1d1ef89f392329d38 Mon Sep 17 00:00:00 2001 From: Le Tuan Anh Date: Thu, 15 Apr 2021 22:50:02 +0800 Subject: [PATCH 06/11] add config command, more info, detect if DB is available before lookup --- jamdict/tools.py | 53 ++++++++++++++++++++++++++++++++++++------------ 1 file changed, 40 insertions(+), 13 deletions(-) diff --git a/jamdict/tools.py b/jamdict/tools.py index bfe98cd..cb147d8 100755 --- a/jamdict/tools.py +++ b/jamdict/tools.py @@ -39,10 +39,12 @@ import os import json +import logging from chirptext import confirm, TextReport, Timer from chirptext.cli import CLIApp, setup_logging +import jamdict from jamdict import Jamdict from jamdict import config from jamdict import version_info @@ -63,6 +65,10 @@ setup_logging(os.path.join(config.home_dir(), 'logging.json'), 'logs') +def getLogger(): + return logging.getLogger(__name__) + + # ------------------------------------------------------------------------------- # Functions # ------------------------------------------------------------------------------- @@ -164,17 +170,21 @@ def dump_result(results, report=None): def lookup(cli, args): '''Lookup words by kanji/kana''' jam = get_jam(cli, args) - results = jam.lookup(args.query, strict_lookup=args.strict) - report = TextReport(args.output) - if args.format == 'json': - report.print(json.dumps(results.to_json(), - ensure_ascii=args.ensure_ascii, - indent=args.indent if args.indent else None)) - else: - if args.compact: - report.print(results.text(separator='\n------\n', entry_sep='\n')) + if jam.ready: + results = jam.lookup(args.query, strict_lookup=args.strict) + report = TextReport(args.output) + if args.format == 'json': + report.print(json.dumps(results.to_json(), + ensure_ascii=args.ensure_ascii, + indent=args.indent if args.indent else None)) else: - dump_result(results, report=report) + if args.compact: + report.print(results.text(separator='\n------\n', entry_sep='\n')) + else: + dump_result(results, report=report) + else: + getLogger().warning(f"Jamdict database is not available.\nThere are 3 ways to install data: \n 1) install jamdict_data via PyPI using `pip install jamdict_data` \n 2) download prebuilt dictionary database file from: {jamdict.__url__}, \n 3) or build your own database file from XML source files.") + def file_status(file_path): @@ -188,13 +198,20 @@ def show_info(cli, args): output.print("Jamdict " + version_info.__version__) output.print(version_info.__description__) output.header("Basic configuration") - output.print("JAMDICT_HOME : {}".format(config.home_dir())) - output.print("Config file location: {}".format(config._get_config_manager().locate_config())) - output.header("Data files") + output.print(f"JAMDICT_HOME : {config.home_dir()}") + output.print(f"jamdict_data availability: {jamdict.util._JAMDICT_DATA_AVAILABLE}") + _config_path = config._get_config_manager().locate_config() + if not _config_path: + _config_path = "Not available.\n Run `python3 -m jamdict config` to create configuration file if needed." + output.print(f"Config file location : {_config_path}") + + output.header("Custom data files") output.print("Jamdict DB location: {} - {}".format(args.jdb, file_status(args.jdb))) output.print("JMDict XML file : {} - {}".format(args.jmdxml, file_status(args.jmdxml))) output.print("KanjiDic2 XML file : {} - {}".format(args.kd2xml, file_status(args.kd2xml))) output.print("JMnedict XML file : {} - {}".format(args.jmnexml, file_status(args.jmnexml))) + output.header("Others") + output.print(f"lxml availability: {jamdict.jmdict._LXML_AVAILABLE}") def show_version(cli, args): @@ -205,6 +222,12 @@ def show_version(cli, args): print("Jamdict {}".format(version_info.__version__)) +def config_jamdict(cli, args): + ''' Create configuration file ''' + jamdict.config._ensure_config() + show_info(cli, args) + + # ------------------------------------------------------------------------------- # Main # ------------------------------------------------------------------------------- @@ -237,6 +260,10 @@ def main(): version_task = app.add_task('version', func=show_version) add_data_config(version_task) + # create config file + config_task = app.add_task('config', func=config_jamdict) + add_data_config(config_task) + # look up task lookup_task = app.add_task('lookup', func=lookup) lookup_task.add_argument('query', help='kanji/kana') From caedade2a362c55a09c04c497e7e993c8c80bc2a Mon Sep 17 00:00:00 2001 From: Le Tuan Anh Date: Thu, 15 Apr 2021 22:50:35 +0800 Subject: [PATCH 07/11] make lxml optional --- requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 30154ce..dfd161d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,2 @@ -lxml chirptext >= 0.1a19 puchikarui >= 0.1a3 From e98043eb8b4fc8534e8278d57bf4b93c82767b3e Mon Sep 17 00:00:00 2001 From: Le Tuan Anh Date: Thu, 15 Apr 2021 22:50:44 +0800 Subject: [PATCH 08/11] add document link --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index c20794a..de3bb80 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ Python library for manipulating Jim Breen's JMdict & KanjiDic2 +[![ReadTheDocs Badge](https://readthedocs.org/projects/jamdict/badge/?version=latest&style=plastic)](https://jamdict.readthedocs.io/) + # Main features * Support querying different Japanese language resources From 83b0f5765498cd5a8e4b35d0543b292062f62860 Mon Sep 17 00:00:00 2001 From: Le Tuan Anh Date: Thu, 15 Apr 2021 22:50:51 +0800 Subject: [PATCH 09/11] note changes --- CHANGES.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 31eac65..eeb8621 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,8 @@ +2021-04-15 + - Make `lxml` optional + - Data package can be installed via PyPI with `jamdict_data` package + - Make configuration file optional as data files can be installed via PyPI. + 2020-05-31 - [Version 0.1a7] - Added Japanese Proper Names Dictionary (JMnedict) support From a078939be5cad1e5a7a8cd1f60532be62c5f976f Mon Sep 17 00:00:00 2001 From: Le Tuan Anh Date: Thu, 15 Apr 2021 22:51:05 +0800 Subject: [PATCH 10/11] initialize documentation --- docs/Makefile | 20 ++++++ docs/api.rst | 27 ++++++++ docs/conf.py | 53 +++++++++++++++ docs/index.rst | 150 ++++++++++++++++++++++++++++++++++++++++++ docs/install.rst | 70 ++++++++++++++++++++ docs/make.bat | 35 ++++++++++ docs/recipes.rst | 100 ++++++++++++++++++++++++++++ docs/requirements.txt | 1 + docs/tutorials.rst | 53 +++++++++++++++ 9 files changed, 509 insertions(+) create mode 100644 docs/Makefile create mode 100644 docs/api.rst create mode 100644 docs/conf.py create mode 100644 docs/index.rst create mode 100644 docs/install.rst create mode 100644 docs/make.bat create mode 100644 docs/recipes.rst create mode 100644 docs/requirements.txt create mode 100644 docs/tutorials.rst diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..d4bb2cb --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/api.rst b/docs/api.rst new file mode 100644 index 0000000..2f0a210 --- /dev/null +++ b/docs/api.rst @@ -0,0 +1,27 @@ +jamdict APIs +============ + +An overview of jamdict modules. + +.. module:: jamdict + +.. autoclass:: jamdict.util.LookupResult + :members: + :member-order: groupwise + +.. autoclass:: jamdict.util.Jamdict + :members: + :member-order: groupwise + :exclude-members: get_ne, has_jmne, import_data, jmnedict + +.. module:: jamdict.jmdict + +.. autoclass:: JMDEntry + :members: + +.. module:: jamdict.kanjidic2 + +.. autoclass:: Character + :members: + +.. automodule:: jamdict.krad diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..48496c5 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,53 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +import os +import sys +sys.path.insert(0, os.path.abspath('../')) + + +# -- Project information ----------------------------------------------------- + +project = 'jamdict' +copyright = '2021, Le Tuan Anh' +author = 'Le Tuan Anh' + + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode', 'sphinx.ext.doctest'] +# -- Highlight code block ----------------- +pygments_style = 'sphinx' + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'bizstyle' + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..a2d76b3 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,150 @@ + +Jamdict's documentation! +======================== + +`Jamdict `_ is a Python 3 library for manipulating Jim Breen's JMdict, KanjiDic2, JMnedict and kanji-radical mappings. + +Main features +------------- + +- Support querying different Japanese language resources + + - Japanese-English dictionary JMDict + - Kanji dictionary KanjiDic2 + - Kanji-radical and radical-kanji maps KRADFILE/RADKFILE + - Japanese Proper Names Dictionary (JMnedict) + +- Data are stored using SQLite database +- Console lookup tool +- jamdol (jamdol-flask) - a Python/Flask server that provides Jamdict + lookup via REST API (experimental state) + +:ref:`Contributors are welcome! 🙇 ` + +Installation +------------ + +Jamdict is `available on PyPI `_ and +can be installed using pip command + +.. code:: bash + + pip install jamdict jamdict_data + +Sample jamdict Python code +-------------------------- + +Looking up words + + >>> from jamdict import Jamdict + >>> jam = Jamdict() + >>> result = jam.lookup('はな') + >>> for word in result.entries: + ... print(word) + ... + [id#1194500] はな (花) : 1. flower/blossom/bloom/petal ((noun (common) (futsuumeishi))) 2. cherry blossom 3. beauty 4. blooming (esp. of cherry blossoms) 5. ikebana 6. Japanese playing cards 7. (the) best + [id#1486720] はな (鼻) : nose ((noun (common) (futsuumeishi))) + [id#1581610] はし (端) : 1. end (e.g. of street)/tip/point/edge/margin ((noun (common) (futsuumeishi))) 2. beginning/start/first 3. odds and ends/scrap/odd bit/least + [id#1634180] はな (洟) : snivel/nasal mucus/snot ((noun (common) (futsuumeishi))) + +Looking up kanji characters + + >>> for c in result.chars: + ... print(repr(c)) + ... + 花:7:flower + 華:10:splendor,flower,petal,shine,luster,ostentatious,showy,gay,gorgeous + 鼻:14:nose,snout + 端:14:edge,origin,end,point,border,verge,cape + 洟:9:tear,nasal discharge + +Looking up named entities + + >>> result = jam.lookup('ディズニー%') + >>> for name in result.names: + ... print(name) + ... + [id#5053163] ディズニー : Disney (family or surname/company name) + [id#5741091] ディズニーランド : Disneyland (place name) + +See :ref:`recipes` for more sample code. + +Command line tools +------------------ + +Jamdict can be used from the command line. + +.. code:: bash + + python3 -m jamdict lookup 言語学 + ======================================== + Found entries + ======================================== + Entry: 1264430 | Kj: 言語学 | Kn: げんごがく + -------------------- + 1. linguistics ((noun (common) (futsuumeishi))) + + ======================================== + Found characters + ======================================== + Char: 言 | Strokes: 7 + -------------------- + Readings: yan2, eon, 언, Ngôn, Ngân, ゲン, ゴン, い.う, こと + Meanings: say, word + Char: 語 | Strokes: 14 + -------------------- + Readings: yu3, yu4, eo, 어, Ngữ, Ngứ, ゴ, かた.る, かた.らう + Meanings: word, speech, language + Char: 学 | Strokes: 8 + -------------------- + Readings: xue2, hag, 학, Học, ガク, まな.ぶ + Meanings: study, learning, science + + No name was found. + +To show help you may use + +.. code:: bash + + python3 -m jamdict --help + +Documentation +------------- + +.. toctree:: + :maxdepth: 2 + + install + tutorials + recipes + api + +Other info +========== + +.. _contributors: + +Contributors +------------ + +- `Matteo Fumagalli `__ +- `Reem Alghamdi `__ + +Useful links +------------ + +- jamdict on PyPI: https://pypi.org/project/jamdict/ +- jamdict source code: https://github.com/neocl/jamdict/ +- Documentation: https://jamdict.readthedocs.io/ +- Dictionaries + - JMdict: http://edrdg.org/jmdict/edict_doc.html + - kanjidic2: https://www.edrdg.org/wiki/index.php/KANJIDIC_Project + - JMnedict: https://www.edrdg.org/enamdict/enamdict_doc.html + - KRADFILE: http://www.edrdg.org/krad/kradinf.html + +Indices and tables +------------------ + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/docs/install.rst b/docs/install.rst new file mode 100644 index 0000000..7a75aaf --- /dev/null +++ b/docs/install.rst @@ -0,0 +1,70 @@ +Installation +============= + +jamdict and jamdict dictionary data are both available on PyPI and can be installed using `pip`. + +.. code-block:: bash + + pip install --user jamdict jamdict_data + # pip script sometimes doesn't work properly + # so you may want to try this instead + python3 -m pip install jamdict jamdict_data + +Download database file manually +------------------------------- + +This should not be useful anymore from version 0.1a8 with the release of the `jamdict_data `_ package on PyPI. +If for some reason you want to download and install jamdict database by yourself, here are the steps: + +1. Download the offical, pre-compiled jamdict database + (``jamdict-0.1a7.tar.xz``) from Google Drive + https://drive.google.com/drive/u/1/folders/1z4zF9ImZlNeTZZplflvvnpZfJp3WVLPk +2. Extract and copy ``jamdict.db`` to jamdict data folder (defaulted to + ``~/.jamdict/data/jamdict.db``) +3. To know where to copy data files you can use `python3 -m jamdict info` command via a terminal: + +.. code:: bash + + python3 -m jamdict info + # Jamdict 0.1a8 + # Python library for manipulating Jim Breen's JMdict, KanjiDic2, KRADFILE and JMnedict + # + # Basic configuration + # ------------------------------------------------------------ + # JAMDICT_HOME : ~/local/jamdict + # jamdict_data availability: False + # Config file location : /home/tuananh/.jamdict/config.json + # + # Custom Data files + # ------------------------------------------------------------ + # Jamdict DB location: ~/local/jamdict/data/jamdict.db - [OK] + # JMDict XML file : ~/local/jamdict/data/JMdict_e.gz - [OK] + # KanjiDic2 XML file : ~/local/jamdict/data/kanjidic2.xml.gz - [OK] + # JMnedict XML file : ~/local/jamdict/data/JMnedict.xml.gz - [OK] + # + # Others + # ------------------------------------------------------------ + # lxml availability: False + +Build database file from source +------------------------------- + +Normal users who just want to look up the dictionaries do not have to do this. +If you are a developer and want to build jamdict database from source, +copy the dictionary source files to jamdict data folder. +The original XML files can be downloaded either from the official website +https://www.edrdg.org/ or from `this jamdict Google Drive folder `_. + +To find out where to copy the files or whether they are recognised by jamdict, +you may use the command `python3 -m jamdict info` as in the section above. + +You should make sure that all files under the section `Custom data files` are all marked [OK]. +After that you should be able to build the database with the command: + +.. code:: bash + + python3 -m jamdict import + +Note on XML parser: jamdict will use `lxml` instead of Python 3 default `xml` when it is available. + + diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..2119f51 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/recipes.rst b/docs/recipes.rst new file mode 100644 index 0000000..5bec451 --- /dev/null +++ b/docs/recipes.rst @@ -0,0 +1,100 @@ +.. _recipes: + +Common Recipes +============== + +- Search words using wildcards. +- Searching for kanji characters. +- Decomposing kanji characters into components, or search kanji characters by components. +- Search for named entities. + +👉 ⚠️ THIS SECTION IS STILL UNDER CONSTRUCTION ⚠️ + +All code here assumed that you have created a Jamdict object named :samp:`jam`, like this + + >>> from jamdict import Jamdict + >>> jam = Jamdict() + +Kanjis and radical/components (KRAD/RADK mappings) +-------------------------------------------------- + +Jamdict has built-in support for KRAD/RADK (i.e. kanji-radical and +radical-kanji mapping). The terminology of radicals/components used by +Jamdict can be different from else where. + +- A radical in Jamdict is a principal component, each character has + only one radical. +- A character may be decomposed into several writing components. + +By default jamdict provides two maps: + +- jam.krad is a Python dict that maps characters to list of components. +- jam.radk is a Python dict that maps each available components to a + list of characters. + +.. code:: python + + # Find all writing components (often called "radicals") of the character 雲 + print(jam.krad['雲']) + # ['一', '雨', '二', '厶'] + + # Find all characters with the component 鼎 + chars = jam.radk['鼎'] + print(chars) + # {'鼏', '鼒', '鼐', '鼎', '鼑'} + + # look up the characters info + result = jam.lookup(''.join(chars)) + for c in result.chars: + print(c, c.meanings()) + # 鼏 ['cover of tripod cauldron'] + # 鼒 ['large tripod cauldron with small'] + # 鼐 ['incense tripod'] + # 鼎 ['three legged kettle'] + # 鼑 [] + +Finding name entities +--------------------- + +.. code:: bash + + # Find all names that contain the string 鈴木 + result = jam.lookup('%鈴木%') + for name in result.names: + print(name) + + # [id#5025685] キューティーすずき (キューティー鈴木) : Kyu-ti- Suzuki (1969.10-) (full name of a particular person) + # [id#5064867] パパイヤすずき (パパイヤ鈴木) : Papaiya Suzuki (full name of a particular person) + # [id#5089076] ラジカルすずき (ラジカル鈴木) : Rajikaru Suzuki (full name of a particular person) + # [id#5259356] きつねざきすずきひなた (狐崎鈴木日向) : Kitsunezakisuzukihinata (place name) + # [id#5379158] こすずき (小鈴木) : Kosuzuki (family or surname) + # [id#5398812] かみすずき (上鈴木) : Kamisuzuki (family or surname) + # [id#5465787] かわすずき (川鈴木) : Kawasuzuki (family or surname) + # [id#5499409] おおすずき (大鈴木) : Oosuzuki (family or surname) + # [id#5711308] すすき (鈴木) : Susuki (family or surname) + # ... + +Exact matching +-------------- + +Use exact matching for faster search + +.. code:: python + + # Find an entry (word, name entity) by idseq + result = jam.lookup('id#5711308') + print(result.names[0]) + # [id#5711308] すすき (鈴木) : Susuki (family or surname) + result = jam.lookup('id#1467640') + print(result.entries[0]) + # ねこ (猫) : 1. cat 2. shamisen 3. geisha 4. wheelbarrow 5. clay bed-warmer 6. bottom/submissive partner of a homosexual relationship + + # use exact matching to increase searching speed (thanks to @reem-codes) + result = jam.lookup('猫') + + for entry in result.entries: + print(entry) + + # [id#1467640] ねこ (猫) : 1. cat ((noun (common) (futsuumeishi))) 2. shamisen 3. geisha 4. wheelbarrow 5. clay bed-warmer 6. bottom/submissive partner of a homosexual relationship + # [id#2698030] ねこま (猫) : cat ((noun (common) (futsuumeishi))) + diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 0000000..da8ebf8 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1 @@ +jamdict diff --git a/docs/tutorials.rst b/docs/tutorials.rst new file mode 100644 index 0000000..252d773 --- /dev/null +++ b/docs/tutorials.rst @@ -0,0 +1,53 @@ +Tutorials +========= + +Getting started +--------------- + +Just install :keyword:`jamdict` and :keyword:`jamdict_data` packages via pip and you are ready to go. + +.. code:: python + + from jamdict import Jamdict + jam = Jamdict() + +The most useful function is :func:`jamdict.util.Jamdict.lookup`. +For example: + +.. code:: python + + # use wildcard matching to find any word, or Kanji character, or name + # that starts with 食べ and ends with る + result = jam.lookup('食べ%る') + +To access the result object you may use: + +.. code:: python + + # print all word entries + for entry in result.entries: + print(entry) + + # [id#1358280] たべる (食べる) : 1. to eat ((Ichidan verb|transitive verb)) 2. to live on (e.g. a salary)/to live off/to subsist on + # [id#1358300] たべすぎる (食べ過ぎる) : to overeat ((Ichidan verb|transitive verb)) + # [id#1852290] たべつける (食べ付ける) : to be used to eating ((Ichidan verb|transitive verb)) + # [id#2145280] たべはじめる (食べ始める) : to start eating ((Ichidan verb)) + # [id#2449430] たべかける (食べ掛ける) : to start eating ((Ichidan verb)) + # [id#2671010] たべなれる (食べ慣れる) : to be used to eating/to become used to eating/to be accustomed to eating/to acquire a taste for ((Ichidan verb)) + # [id#2765050] たべられる (食べられる) : 1. to be able to eat ((Ichidan verb|intransitive verb)) 2. to be edible/to be good to eat ((pre-noun adjectival (rentaishi))) + # [id#2795790] たべくらべる (食べ比べる) : to taste and compare several dishes (or foods) of the same type ((Ichidan verb|transitive verb)) + # [id#2807470] たべあわせる (食べ合わせる) : to eat together (various foods) ((Ichidan verb)) + + # print all related characters + for c in result.chars: + print(repr(c)) + + # 食:9:eat,food + # 喰:12:eat,drink,receive (a blow),(kokuji) + # 過:12:overdo,exceed,go beyond,error + # 付:5:adhere,attach,refer to,append + # 始:8:commence,begin + # 掛:11:hang,suspend,depend,arrive at,tax,pour + # 慣:14:accustomed,get used to,become experienced + # 比:4:compare,race,ratio,Philippines + # 合:6:fit,suit,join,0.1 From a33056ab15efde97f8eae11ad7f5b182a8baeb71 Mon Sep 17 00:00:00 2001 From: Le Tuan Anh Date: Thu, 15 Apr 2021 22:51:33 +0800 Subject: [PATCH 11/11] pump version to 0.1a8 --- jamdict/__version__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jamdict/__version__.py b/jamdict/__version__.py index d35fcf3..78d0196 100644 --- a/jamdict/__version__.py +++ b/jamdict/__version__.py @@ -10,6 +10,6 @@ __url__ = "https://github.com/neocl/jamdict" __maintainer__ = "Le Tuan Anh" __version_major__ = "0.1" -__version__ = "{}a7".format(__version_major__) +__version__ = "{}a8".format(__version_major__) __version_long__ = "{} - Alpha".format(__version_major__) __status__ = "Prototype"