diff --git a/Dict - Lookup Word/actions.py b/Dict - Lookup Word/actions.py index df208e0..0a779c6 100644 --- a/Dict - Lookup Word/actions.py +++ b/Dict - Lookup Word/actions.py @@ -5,17 +5,14 @@ import os import re import cndict -import plistlib +import json -if len(sys.argv) != 2: - sys.exit(1) +from cache import Cache +from alfredplist import AlfredPlist -def get_keyword(): - plist = plistlib.readPlist(os.path.abspath('./info.plist')) - for item in plist['objects']: - if 'keyword' in item['config']: - return item['config']['keyword'] +if len(sys.argv) != 2: + sys.exit(1) def restore(arg): @@ -29,17 +26,30 @@ def shell_exec(cmd, arg, escape=False): os.system(cmd.format("'{}'".format(arg.replace("'", "\\'")))) -match = re.match(r'^(:.*?) ([@|>]) (.*?)$', sys.argv[1]) +plist = AlfredPlist() +plist.read(os.path.abspath('./info.plist')) + +match = re.match(r'^:(.*?) ([@|>]) (.*?)$', sys.argv[1]) if match: - keyword = get_keyword() - if keyword: - restore('{} {}'.format(keyword, match.group(1))) + command = match.group(1).strip() + if command == 'clean': + base_dir = os.path.expanduser('~/Library/Caches/com.runningwithcrayons.Alfred-2/Workflow Data/') + dict_cache = Cache(os.path.join(base_dir, plist.get_bundleid())) + dict_cache.clean() + elif command == 'config': + shell_exec('open {}', os.path.abspath('./config.json')) + elif command == 'update': + config_data = open(os.path.abspath('./config.json')).read() + config = json.loads(re.sub(r'//.*', '', config_data)) + plist.set_keyword(config['keyword']) + plist.set_keymap(config['keymap']) + plist.write(os.path.abspath('./info.plist')) else: match = re.match(r'^(.*?) @ (.*?) (\| (.*) )?([@|>]) (.*?)$', sys.argv[1]) if match: word, dictionary, _, item, operator, command = match.groups() if operator == '@': - keyword = get_keyword() + keyword = plist.get_keyword() if keyword: if item: new_word = cndict.extract(dictionary, word, item) diff --git a/Dict - Lookup Word/alfredplist.py b/Dict - Lookup Word/alfredplist.py new file mode 100644 index 0000000..a868cff --- /dev/null +++ b/Dict - Lookup Word/alfredplist.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import re +import plistlib +import cndict + + +class AlfredPlist(object): + def __init__(self): + self.__modifier_map = { + 0: 'none', + 524288: 'alt', + 262144: 'ctrl', + 131072: 'shift', + 1048576: 'cmd', + 8388608: 'fn' + } + self.__plist = None + self.__base = None + self.__branches = {} + + def read(self, path): + self.__plist = plistlib.readPlist(path) + for obj in self.__plist['objects']: + if 'keyword' in obj['config']: + self.__base = obj + uid = self.__base['uid'] + for conn in self.__plist['connections'][uid]: + self.__branches[conn['destinationuid']] = [conn, None] + break + for uid, pair in self.__branches.iteritems(): + for obj in self.__plist['objects']: + if uid == obj['uid']: + pair[1] = obj + break + + def write(self, path): + if self.__plist: + plistlib.writePlist(self.__plist, path) + + def get_keyword(self): + if self.__base: + return self.__base['config']['keyword'] + return '' + + def set_keyword(self, value): + if self.__base: + self.__base['config']['keyword'] = value + + def get_keymap(self): + keymap = {} + for conn, child in self.__branches.values(): + modifier = self.__modifier_map[conn['modifiers']] + match = re.search(r'[@|>] (\w+)"', child['config']['script']) + if match: + keymap[modifier] = match.group(1) + return keymap + + def set_keymap(self, value): + keymap = value + for conn, child in self.__branches.values(): + modifier = self.__modifier_map[conn['modifiers']] + if modifier in keymap: + child['config']['script'] = re.sub(r'(?<=[@|>] )\w+(?=")', keymap[modifier], + child['config']['script']) + if modifier != 'none': + dict_name = cndict.get_full_name(keymap[modifier]) + conn['modifiersubtext'] = re.sub(r'(?<=in )\w+(?= dict)', dict_name, + conn['modifiersubtext']) + + def get_bundleid(self): + if self.__plist: + return self.__plist['bundleid'] + return '' diff --git a/Dict - Lookup Word/cndict/__init__.py b/Dict - Lookup Word/cndict/__init__.py index 2129453..8202a11 100644 --- a/Dict - Lookup Word/cndict/__init__.py +++ b/Dict - Lookup Word/cndict/__init__.py @@ -1,19 +1,26 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +import sys import importlib from utils import * -def _load_module(dictionary): +def get_full_name(dictionary): dict_name_map = { - 'sys': 'system', + 'nj': 'oxford', + 'ld': 'landau', 'yd': 'youdao', 'cb': 'iciba', 'bd': 'baidu', - 'by': 'bing' + 'by': 'bing', + 'hc': 'dictcn' } - module_name = dict_name_map.get(dictionary, dictionary) + 'dict' + return dict_name_map.get(dictionary, dictionary) + + +def _load_module(dictionary): + module_name = get_full_name(dictionary) try: return importlib.import_module('.' + module_name, __name__) except ImportError: diff --git a/Dict - Lookup Word/cndict/__main__.py b/Dict - Lookup Word/cndict/__main__.py new file mode 100644 index 0000000..1fead83 --- /dev/null +++ b/Dict - Lookup Word/cndict/__main__.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys +import os + +sys.path.append(os.path.join(os.path.dirname(__file__), '..')) +from cndict import lookup + +if len(sys.argv) < 3: + print 'usage: python cndict ' + sys.exit(1) + +result = lookup(*(sys.argv[1:])) +print '\n'.join(result) diff --git a/Dict - Lookup Word/cndict/baidudict.py b/Dict - Lookup Word/cndict/baidu.py similarity index 98% rename from Dict - Lookup Word/cndict/baidudict.py rename to Dict - Lookup Word/cndict/baidu.py index 622286c..4e598b6 100644 --- a/Dict - Lookup Word/cndict/baidudict.py +++ b/Dict - Lookup Word/cndict/baidu.py @@ -7,7 +7,7 @@ from utils import * -def lookup(word): +def lookup(word, *args): params = { 'client_id': 'Gh4UZOrtK9cUba2MW4SuTS3T', 'q': word diff --git a/Dict - Lookup Word/cndict/bing.py b/Dict - Lookup Word/cndict/bing.py new file mode 100644 index 0000000..42ed61b --- /dev/null +++ b/Dict - Lookup Word/cndict/bing.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import urllib +import urllib2 +import gzip +import re +import StringIO +from utils import * + + +def lookup(word, wap_page=True, *args): + params = {'q': word} + if wap_page: + params['view'] = 'wap' + url = 'http://dict.bing.com.cn/' + else: + url = 'http://www.bing.com/dict/search' + url = '{}?{}'.format(url, urllib.urlencode(params)) + try: + request = urllib2.Request(url) + request.add_header('Accept-Encoding', 'gzip') + response = urllib2.urlopen(request) + data = response.read() + except: + raise DictLookupError('error to fetch data.') + + if response.info().get('Content-Encoding') == 'gzip': + gzip_file = gzip.GzipFile(fileobj=StringIO.StringIO(data)) + data = gzip_file.read() + + result = [] + is_eng = is_english(word) + + if wap_page: + match = re.search(r'
.*?{}
(.*?)
web\.
'.format( + r'.*?US:\[(.*?)\](.*UK:\[.*?\])?.*?' + if is_eng else + r'.*?'), data) + if match is None: + raise DictLookupError('failed to parse html.') + + phonetic = match.group(1) if is_eng else '' + result.append('{}{}'.format(word, ' /{}/'.format(phonetic) if phonetic else '')) + definition = match.group(3 if is_eng else 1) + items = definition.replace(' ', '').replace('•', '').split('
') + part = '' + for item in items: + match = re.match(r'^([a-z]+\.)$', item) + if match: + part = match.group(1) + continue + result.append('{} {}'.format(part, item)) + else: + # no need to use BeautifulSoup, just extract definition from meta tag + match = re.search(r'', data) + if match is None: + raise DictLookupError('failed to find meta tag.') + description = match.group(1) + + match = re.match(r'^必应词典为您提供.*的释义,{},(.*); 网络释义: .*; $'.format( + r'美\[(.*?)\](,英\[.*?\])?' + if is_eng else + r'拼音\[(.*)\]'), description) + if match: + phonetic = match.group(1) + result.append('{}{}'.format(word, ' /{}/'.format(phonetic) if phonetic else '')) + items = match.group(3 if is_eng else 2).split('; ') + if is_eng: + for item in items: + if item != '': + result.append(item) + else: + for item in items: + match = re.match(r'([a-z]+\.) (.+)', item) + if match: + part = match.group(1) + for new_item in match.group(2).split('; '): + result.append('{} {}'.format(part, new_item)) + return result + + +def extract(word, item): + if not is_english(word): + match = re.match(r'[a-z]+\. (.+)', item) + if match: + return match.group(1) + + +def get_url(word): + params = {'q': word} + return '{}?{}'.format('http://www.bing.com/dict/search', urllib.urlencode(params)) diff --git a/Dict - Lookup Word/cndict/bingdict.py b/Dict - Lookup Word/cndict/bingdict.py deleted file mode 100644 index 5a94d1e..0000000 --- a/Dict - Lookup Word/cndict/bingdict.py +++ /dev/null @@ -1,66 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import urllib -import urllib2 -import gzip -import re -import StringIO -from utils import * - - -def lookup(word): - params = {'q': word} - url = '{}?{}'.format('http://www.bing.com/dict/search', urllib.urlencode(params)) - try: - request = urllib2.Request(url) - request.add_header('Accept-Encoding', 'gzip') - response = urllib2.urlopen(request) - data = response.read() - except: - raise DictLookupError('error to fetch data.') - - if response.info().get('Content-Encoding') == 'gzip': - gzip_file = gzip.GzipFile(fileobj=StringIO.StringIO(data)) - data = gzip_file.read() - - # no need to use BeautifulSoup, just extract definition from meta tag - match = re.search(r'', data) - if match is None: - raise DictLookupError('failed to find meta tag.') - description = match.group(1) - - result = [] - is_eng = is_english(word) - match = re.match(r'^必应词典为您提供.*的释义,{},(.*); 网络释义: .*; $'.format( - r'美\[(.*?)\](,英\[.*\])?' - if is_eng else - r'拼音\[(.*)\]'), description) - if match: - phonetic = match.group(1) - result.append('{}{}'.format(word, ' /{}/'.format(phonetic) if phonetic else '')) - items = match.group(3 if is_eng else 2).split('; ') - if is_eng: - for item in items: - if item != '': - result.append(item) - else: - for item in items: - match = re.match(r'([a-z]+\.) (.+)', item) - if match: - part = match.group(1) - for new_item in match.group(2).split('; '): - result.append('{} {}'.format(part, new_item)) - return result - - -def extract(word, item): - if not is_english(word): - match = re.match(r'[a-z]+\. (.+)', item) - if match: - return match.group(1) - - -def get_url(word): - params = {'q': word} - return '{}?{}'.format('http://www.bing.com/dict/search', urllib.urlencode(params)) diff --git a/Dict - Lookup Word/cndict/dictcn.py b/Dict - Lookup Word/cndict/dictcn.py new file mode 100644 index 0000000..3370cb4 --- /dev/null +++ b/Dict - Lookup Word/cndict/dictcn.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import urllib +import urllib2 +import gzip +import re +import StringIO +from utils import * + + +def lookup(word, wap_page=False, *args): + if wap_page: + params = {'q': word} + url = '{}?{}'.format('http://3g.dict.cn/s.php', urllib.urlencode(params)) + else: + url = 'http://dict.cn/{}'.format(urllib.quote(word)) + try: + request = urllib2.Request(url) + request.add_header('Accept-Encoding', 'gzip') + response = urllib2.urlopen(request) + data = response.read() + except: + raise DictLookupError('error to fetch data.') + + if response.info().get('Content-Encoding') == 'gzip': + gzip_file = gzip.GzipFile(fileobj=StringIO.StringIO(data)) + data = gzip_file.read() + + result = [] + is_eng = is_english(word) + + if wap_page: + match = re.search(r'

.*?

\s*
{}
{}'.format( + r'\s*\[.*?\].*?\s*\[(.*?)\].*?\s*' + if is_eng else + r'\[(.*?)\]', + r'\s*
(.*?)
' + if is_eng else + r''), data, re.S) + if match is None: + raise DictLookupError('failed to parse html.') + + phonetic = match.group(1) + result.append('{}{}'.format(word, ' /{}/'.format(phonetic) if phonetic else '')) + if is_eng: + definition = match.group(2) + for match in re.finditer(r'(.*?)(.*?)
', definition): + result.append('{} {}'.format(match.group(1), match.group(2))) + else: + match = re.search(r'
{}
'.format( + r'\s*\s*英 .*?\[.*?\].*?\s*\s*美 .*?\[(.*?)\].*?\s*' + if is_eng else + r'\s*\[(.*?)\]\s*'), data, re.S) + phonetic = match.group(1) if match else '' + result.append('{}{}'.format(word, ' /{}/'.format(phonetic) if phonetic else '')) + + match = re.search(r'
(.*?)
'.format( + r'detail' if is_eng else r'cn'), data, re.S) + if match is None: + raise DictLookupError('failed to parse html.') + + definition = match.group(1) + if is_eng: + for match in re.finditer(r'(.*?).*?\s*
    (.*?)
', definition, re.S): + part = match.group(1) + for item in re.finditer(r'
  • (.*?)
  • ', match.group(2)): + result.append('{} {}'.format(part, item.group(1))) + else: + match = re.search(r'
      (.*?)
    ', definition, re.S) + if match: + for item in re.finditer(r'
  • (.*?)
  • ', match.group(1)): + result.append(item.group(1)) + return result + + +def extract(word, item): + if not is_english(word): + match = re.match(r'[a-z]+\. (.+)', item) + if match: + return match.group(1) + + +def get_url(word): + return 'http://dict.cn/{}'.format(urllib.quote(word)) diff --git a/Dict - Lookup Word/cndict/icibadict.py b/Dict - Lookup Word/cndict/iciba.py similarity index 98% rename from Dict - Lookup Word/cndict/icibadict.py rename to Dict - Lookup Word/cndict/iciba.py index aa19faf..68be1fb 100644 --- a/Dict - Lookup Word/cndict/icibadict.py +++ b/Dict - Lookup Word/cndict/iciba.py @@ -6,7 +6,7 @@ from utils import * -def lookup(word): +def lookup(word, *args): params = { 'key': 'E93A321FB1995DF5EC118B51ABAF8DC7', 'type': 'json', diff --git a/Dict - Lookup Word/cndict/landau.py b/Dict - Lookup Word/cndict/landau.py new file mode 100644 index 0000000..6b16db7 --- /dev/null +++ b/Dict - Lookup Word/cndict/landau.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import os +import re +import urllib +import subprocess +from utils import * + + +def lookup(word, *args): + cmd = '{}/{}'.format(os.path.dirname(os.path.realpath(__file__)), 'systemdict') + if os.path.isfile(cmd) and os.access(cmd, os.X_OK): + dict_file = 'langdao-ec-gb.dictionary' if is_english(word) else 'langdao-ce-gb.dictionary' + dict_path = os.path.expanduser('~/Library/Dictionaries/{}'.format(dict_file)) + proc = subprocess.Popen([cmd, dict_path, word], stdout=subprocess.PIPE) + definition = proc.stdout.read() + if definition == '(null)\n': + return [] + definition = definition.decode('utf-8') + else: + raise DictLookupError('file {} not found or not executable.'.format(cmd)) + + result = [] + definition = definition.encode('utf-8').split('\n相关词组:\n')[0] + result = definition.split('\n') + if is_english(word): + if result[1].startswith('*['): + phonetic = result[1][2:-1] + result[0:2] = ['{} {}'.format(word, '/{}/'.format(phonetic) if phonetic else '')] + else: + result[1:2] = result[1].split('; ') + return result + + +def extract(word, item): + if not is_english(word): + match = re.match(r'(【.+】 )?(.+)', item) + if match: + return match.group(2) + + +def get_url(word): + return 'dict://' + urllib.quote(word) diff --git a/Dict - Lookup Word/cndict/oxford.py b/Dict - Lookup Word/cndict/oxford.py new file mode 100644 index 0000000..aece190 --- /dev/null +++ b/Dict - Lookup Word/cndict/oxford.py @@ -0,0 +1,188 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import os +import re +import urllib +import subprocess +from utils import * + + +def lookup(word, external_cmd=True, *args): + if external_cmd: + cmd = '{}/{}'.format(os.path.dirname(os.path.realpath(__file__)), 'systemdict') + if os.path.isfile(cmd) and os.access(cmd, os.X_OK): + dict_path = '/Library/Dictionaries/Simplified Chinese - English.dictionary' + proc = subprocess.Popen([cmd, dict_path, word], stdout=subprocess.PIPE) + definition = proc.stdout.read() + if definition == '(null)\n': + return [] + definition = definition.decode('utf-8') + else: + raise DictLookupError('file {} not found or not executable.'.format(cmd)) + else: + from DictionaryServices import DCSCopyTextDefinition + unicode_word = word.decode('utf-8') + word_range = (0, len(unicode_word)) + definition = DCSCopyTextDefinition(None, unicode_word, word_range) + if definition is None: + return [] + + result = [] + is_eng = is_english(word) + + number = u'①-⑳㉑-㉟㊱-㊿' + chinese = ur'\u4e00-\u9fa5' + pinyin = u'āáǎàēéěèōóǒòīíǐìūúǔùüǘǚǜńň' + phrase = r"a-zA-Z,\. " + sentence = ur"0-9a-zA-Z'‘’«»£\$/\?!,\.\[\]\(\) " + pinyin_all = u"a-zA-Z{}'… ".format(pinyin) + sentence_full = ur'([{1}][{0}]*[{1}]|\([{0}]*[{1}]|[{1}][{0}]*\)) ?[{2}]+'.format( + sentence, sentence.replace(r'\(\) ', ''), chinese) + + part_map = { + 'noun': 'n.', + 'intransitive verb': 'vi.', + 'transitive verb': 'vt.', + 'adjective': 'adj.', + 'adverb': 'adv.', + 'determiner': 'det.', + 'pronoun': 'pron.', + 'preposition': 'prep.', + 'conjunction': 'conj.', + 'exclamation': 'excl.', + 'abbreviation': 'abbr.', + 'noun plural': 'pl.', + 'modifier': 'mod.' + } if is_eng else { + u'名词': u'n.', + u'动词': u'v.', + u'形容词': u'adj.', + u'副词': u'adv.', + u'数词': u'num.', + u'代词': u'pron.', + u'介词': u'prep.', + u'连词': u'conj.', + u'叹词': u'excl.' + } + + ignore_list = [ + 'Countable and uncountable', 'Uncountable and countable', + 'Countable', 'Uncountable', 'British', 'American', + 'colloquial', 'euphemistic', 'dated', 'Linguistics' + ] if is_eng else [ + u'方言', u'客套话', u'委婉语', u'书面语', u'俗语', u'比喻义', u'口语', u'惯用语' + ] + + phrase_mode = False + if is_eng: + word_escaped = re.escape(word) + if not re.match(word_escaped + '(?= )', definition, re.I): + verb_escaped = re.escape(word.split(' ')[0]) + if not re.match(verb_escaped + '(?= )', definition, re.I): + return result + phrase_mode = True + pos = definition.find('PHRASAL VERB') + if phrase_mode: + if pos == -1: + return result + definition = definition[pos:] + match = re.search(r'(({0}:? )([A-Z]\. )?({1}).*?)(?=\b{2} [{3}]*?:? ([A-Z]\. )?({1}))'.format( + word_escaped, '|'.join(part_map.keys()), verb_escaped, phrase), definition) + if match is None: + return result + definition = match.group(1) + start_pos = len(match.group(2)) + else: + if pos != -1: + definition = definition[:pos] + + if phrase_mode: + result.append(word) + else: + trimmed_len = 0 + single_phonetic = True + if is_eng: + phonetics = [] + for match in re.finditer(r'[A-Z]\. \|(.*?)\| ?', definition): + phonetic = match.group(1).encode('utf-8').strip() + phonetic = '/{}/'.format(phonetic) + if phonetic not in phonetics: + phonetics.append(phonetic) + start = match.start() + 3 - trimmed_len + end = match.end() - trimmed_len + definition = definition[:start] + definition[end:] + trimmed_len += end - start + if len(phonetics) > 0: + phonetics = ', '.join(phonetics) + result.append('{} {}'.format(word, phonetics)) + single_phonetic = False + if single_phonetic: + match = re.search(r'\|(.*?)\| ?' + if is_eng else + ur'([^ ]*[{}][^ ]*) ?'.format(pinyin), + definition) + if match is None: + return result + phonetic = match.group(1).encode('utf-8').strip() + result.append('{}{}'.format(word, ' /{}/'.format(phonetic) if phonetic else '')) + start_pos = match.span()[1] + + part_list = [] + pattern = (r'({}) ?(\(.*?\))? ?'.format('|'.join(part_map.keys())) + if is_eng else + ur'({}) '.format('|'.join(part_map.keys()))) + + if 'A. ' not in definition: + match = re.match(pattern, definition[start_pos:]) + if match: + part_list.append((start_pos, start_pos + match.span()[1], part_map[match.group(1)])) + else: + for match in re.finditer(ur'[A-Z]\. {}'.format(pattern), definition): + part_list.append((match.start(), match.end(), part_map[match.group(1)])) + + last_start_pos = len(definition) + pattern = (ur"([^{4}]*?([{0}][{1}]*? |[{2}]*?(\) |›)))(?=({3}|[{4}]|$))".format(pinyin, pinyin_all, phrase, sentence_full, number) + if is_eng else + ur"(?![a-z] )([^{2}]*?[{0}]* )(?=([→{1}{2}]|$))".format(phrase, chinese, number)) + + for part in reversed(part_list): + entry_list = [] + text = definition[part[1]:last_start_pos] + if u'① ' not in text: + match = re.match(pattern, text) + if match: + entry_list.append(match.group(1)) + else: + for match in re.finditer(ur'[{}] {}'.format(number, pattern), text): + entry_list.append(match.group(1)) + + pos = 1 + for entry in entry_list: + entry = re.sub(ur'[{0}]*[{1}][{0}]*'.format(pinyin_all, pinyin) + if is_eng else + r'\[used .*?\]', + '', entry) + entry = re.sub(ur'({})'.format('|'.join(ignore_list)), '', entry) + entry = re.sub(r'\([ /]*\)', '', entry) + entry = re.sub(r' {2,}', ' ', entry).strip() + if is_eng: + entry = entry.replace(u' ;', u';') + entry = (u'{} {}'.format(part[2], entry)).encode('utf-8') + result.insert(pos, entry) + pos += 1 + + last_start_pos = part[0] + + return result + + +def extract(word, item): + if not is_english(word): + match = re.match(r'[a-z]+\. ((.+)|[.+])?(.+)', item) + if match: + return match.group(2) + + +def get_url(word): + return 'dict://' + urllib.quote(word) diff --git a/Dict - Lookup Word/cndict/systemdict.py b/Dict - Lookup Word/cndict/systemdict.py deleted file mode 100644 index 96d0c6a..0000000 --- a/Dict - Lookup Word/cndict/systemdict.py +++ /dev/null @@ -1,214 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import os -import re -import urllib -import subprocess -from utils import * - -DEFAULT_CMD = '{}/{}'.format(os.path.dirname(os.path.realpath(__file__)), 'systemdict') -DEFAULT_DICT_NAME = 'landau' # or 'oxford' - - -def lookup(word, external_cmd=True, cmd=DEFAULT_CMD, dict_name=DEFAULT_DICT_NAME): - if external_cmd: - if os.path.isfile(cmd) and os.access(cmd, os.X_OK): - if dict_name == 'oxford': - dict_path = '/Library/Dictionaries/Simplified Chinese - English.dictionary' - elif dict_name == 'landau': - dict_file = 'langdao-ec-gb.dictionary' if is_english(word) else 'langdao-ce-gb.dictionary' - dict_path = os.path.expanduser('~/Library/Dictionaries/{}'.format(dict_file)) - else: - raise DictLookupError('dict name not valid.') - proc = subprocess.Popen([cmd, dict_path, word], stdout=subprocess.PIPE) - definition = proc.stdout.read() - if definition == '(null)\n': - return [] - definition = definition.decode('utf-8') - else: - raise DictLookupError('file {} not found or not executable.'.format(cmd)) - else: - from DictionaryServices import DCSCopyTextDefinition - unicode_word = word.decode('utf-8') - word_range = (0, len(unicode_word)) - definition = DCSCopyTextDefinition(None, unicode_word, word_range) - if definition is None: - return [] - - result = [] - if dict_name == 'oxford': - is_eng = is_english(word) - - number = u'①-⑳㉑-㉟㊱-㊿' - chinese = ur'\u4e00-\u9fa5' - pinyin = u'āáǎàēéěèōóǒòīíǐìūúǔùüǘǚǜńň' - phrase = r"a-zA-Z,\. " - sentence = ur"0-9a-zA-Z'‘’«»£\$/\?!,\.\[\]\(\) " - pinyin_all = u"a-zA-Z{}'… ".format(pinyin) - sentence_full = ur'([{1}][{0}]*[{1}]|\([{0}]*[{1}]|[{1}][{0}]*\)) ?[{2}]+'.format( - sentence, sentence.replace(r'\(\) ', ''), chinese) - - part_map = { - 'noun': 'n.', - 'intransitive verb': 'vi.', - 'transitive verb': 'vt.', - 'adjective': 'adj.', - 'adverb': 'adv.', - 'determiner': 'det.', - 'pronoun': 'pron.', - 'preposition': 'prep.', - 'conjunction': 'conj.', - 'exclamation': 'excl.', - 'abbreviation': 'abbr.', - 'noun plural': 'pl.', - 'modifier': 'mod.' - } if is_eng else { - u'名词': u'n.', - u'动词': u'v.', - u'形容词': u'adj.', - u'副词': u'adv.', - u'数词': u'num.', - u'代词': u'pron.', - u'介词': u'prep.', - u'连词': u'conj.', - u'叹词': u'excl.' - } - - ignore_list = [ - 'Countable and uncountable', 'Uncountable and countable', - 'Countable', 'Uncountable', 'British', 'American', - 'colloquial', 'euphemistic', 'dated', 'Linguistics' - ] if is_eng else [ - u'方言', u'客套话', u'委婉语', u'书面语', u'俗语', u'比喻义', u'口语', u'惯用语' - ] - - phrase_mode = False - if is_eng: - word_escaped = re.escape(word) - if not re.match(word_escaped + '(?= )', definition, re.I): - verb_escaped = re.escape(word.split(' ')[0]) - if not re.match(verb_escaped + '(?= )', definition, re.I): - return result - phrase_mode = True - pos = definition.find('PHRASAL VERB') - if phrase_mode: - if pos == -1: - return result - definition = definition[pos:] - match = re.search(r'(({0}:? )([A-Z]\. )?({1}).*?)(?=\b{2} [{3}]*?:? ([A-Z]\. )?({1}))'.format( - word_escaped, '|'.join(part_map.keys()), verb_escaped, phrase), definition) - if match is None: - return result - definition = match.group(1) - start_pos = len(match.group(2)) - else: - if pos != -1: - definition = definition[:pos] - - if phrase_mode: - result.append(word) - else: - trimmed_len = 0 - single_phonetic = True - if is_eng: - phonetics = [] - for match in re.finditer(r'[A-Z]\. \|(.*?)\| ?', definition): - phonetic = match.group(1).encode('utf-8').strip() - phonetic = '/{}/'.format(phonetic) - if phonetic not in phonetics: - phonetics.append(phonetic) - start = match.start() + 3 - trimmed_len - end = match.end() - trimmed_len - definition = definition[:start] + definition[end:] - trimmed_len += end - start - if len(phonetics) > 0: - phonetics = ', '.join(phonetics) - result.append('{} {}'.format(word, phonetics)) - single_phonetic = False - if single_phonetic: - match = re.search(r'\|(.*?)\| ?' - if is_eng else - ur'([^ ]*[{}][^ ]*) ?'.format(pinyin), - definition) - if match is None: - return result - phonetic = match.group(1).encode('utf-8').strip() - result.append('{}{}'.format(word, ' /{}/'.format(phonetic) if phonetic else '')) - start_pos = match.span()[1] - - part_list = [] - pattern = (r'({}) ?(\(.*?\))? ?'.format('|'.join(part_map.keys())) - if is_eng else - ur'({}) '.format('|'.join(part_map.keys()))) - - if 'A. ' not in definition: - match = re.match(pattern, definition[start_pos:]) - if match: - part_list.append((start_pos, start_pos + match.span()[1], part_map[match.group(1)])) - else: - for match in re.finditer(ur'[A-Z]\. {}'.format(pattern), definition): - part_list.append((match.start(), match.end(), part_map[match.group(1)])) - - last_start_pos = len(definition) - pattern = (ur"([^{4}]*?([{0}][{1}]*? |[{2}]*?(\) |›)))(?=({3}|[{4}]|$))".format(pinyin, pinyin_all, phrase, sentence_full, number) - if is_eng else - ur"(?![a-z] )([^{2}]*?[{0}]* )(?=([→{1}{2}]|$))".format(phrase, chinese, number)) - - for part in reversed(part_list): - entry_list = [] - text = definition[part[1]:last_start_pos] - if u'① ' not in text: - match = re.match(pattern, text) - if match: - entry_list.append(match.group(1)) - else: - for match in re.finditer(ur'[{}] {}'.format(number, pattern), text): - entry_list.append(match.group(1)) - - pos = 1 - for entry in entry_list: - entry = re.sub(ur'[{0}]*[{1}][{0}]*'.format(pinyin_all, pinyin) - if is_eng else - r'\[used .*?\]', - '', entry) - entry = re.sub(ur'({})'.format('|'.join(ignore_list)), '', entry) - entry = re.sub(r'\([ /]*\)', '', entry) - entry = re.sub(r' {2,}', ' ', entry).strip() - if is_eng: - entry = entry.replace(u' ;', u';') - entry = (u'{} {}'.format(part[2], entry)).encode('utf-8') - result.insert(pos, entry) - pos += 1 - - last_start_pos = part[0] - - elif dict_name == 'landau': - definition = definition.encode('utf-8').split('\n相关词组:\n')[0] - result = definition.split('\n') - if is_english(word): - if result[1].startswith('*['): - phonetic = result[1][2:-1] - result[0:2] = ['{} {}'.format(word, '/{}/'.format(phonetic) if phonetic else '')] - else: - result[1:2] = result[1].split('; ') - else: - raise DictLookupError('dict name not valid.') - - return result - - -def extract(word, item, dict_name=DEFAULT_DICT_NAME): - if not is_english(word): - if dict_name == 'oxford': - match = re.match(r'[a-z]+\. ((.+)|[.+])?(.+)', item) - if match: - return match.group(2) - elif dict_name == 'landau': - match = re.match(r'(【.+】 )?(.+)', item) - if match: - return match.group(2) - - -def get_url(word): - return 'dict://' + urllib.quote(word) diff --git a/Dict - Lookup Word/cndict/youdaodict.py b/Dict - Lookup Word/cndict/youdao.py similarity index 98% rename from Dict - Lookup Word/cndict/youdaodict.py rename to Dict - Lookup Word/cndict/youdao.py index 3761d00..32c8fe1 100644 --- a/Dict - Lookup Word/cndict/youdaodict.py +++ b/Dict - Lookup Word/cndict/youdao.py @@ -6,7 +6,7 @@ from utils import * -def lookup(word): +def lookup(word, *args): params = { 'keyfrom': 'awf-Chinese-Dict', 'key': '19965805', diff --git a/Dict - Lookup Word/config.json b/Dict - Lookup Word/config.json new file mode 100644 index 0000000..ed7cfb4 --- /dev/null +++ b/Dict - Lookup Word/config.json @@ -0,0 +1,59 @@ +{ + // keyword to trigger this workflow, `cc` i.e. `查词` + // ** run internal command 'update' to take effect ** + "keyword": "cc", + + // default dictionary, available options are: + // - `nj` or `oxford`: system oxford dictionary + // - `ld` or `landau`: landau dictionary, not built in, please download and install as instructed + // - `cb` or `iciba`: iciba online dictionary + // - `bd` or `baidu`: baidu online dictionary + // - `by` or `bing`: bing online dictionary, may be slower due to absence of api + // - `yd` or `youdao`: youdao online dictionary + // - `hc` or `dictcn`: dict.cn online dictionary, may be slower due to absence of api + "default": "nj", + + // keymap for user actions + // + none: default action when you press enter, available options are + // - `open`: open Dictionary.app for `sys` and `ld` or open url in browser for other dictionaries + // - `say`: pronounce word via system tts engine + // + ctrl/alt/shift/cmd/fn: dictionary to switch when press enter with these modifier keys + // ** run internal command 'update' to take effect ** + "keymap": + { + "none": "open", + "ctrl": "cb", + "alt": "yd", + "shift": "bd", + "cmd": "nj", + "fn": "by" + }, + + // dictionary options + // these options will be passed to `lookup` function as argument + // ** don't change these options unless you are aware of what you are doing ** + "options": + { + "oxford": + { + "external_cmd": true + }, + "dictcn": + { + "wap_page": false + }, + "bing": + { + "wap_page": true + } + }, + + // cache settings + // + enable: enable cache or not + // + expire: cache expire time in hours + "cache": + { + "enable": true, + "expire": 24 + } +} diff --git a/Dict - Lookup Word/info.plist b/Dict - Lookup Word/info.plist index 7a0891e..d2270b7 100644 --- a/Dict - Lookup Word/info.plist +++ b/Dict - Lookup Word/info.plist @@ -68,7 +68,7 @@ modifiers 524288 modifiersubtext - Lookup this word youdao dict. + Lookup this word in youdao dictionary. destinationuid @@ -76,7 +76,7 @@ modifiers 262144 modifiersubtext - Lookup this word iciba dict. + Lookup this word in iciba dictionary. destinationuid @@ -84,7 +84,7 @@ modifiers 131072 modifiersubtext - Lookup this word in baidu dict. + Lookup this word in in baidu dictionary. destinationuid @@ -92,7 +92,7 @@ modifiers 1048576 modifiersubtext - Lookup this word in system dict. + Lookup this word in in oxford dictionary. destinationuid @@ -100,7 +100,7 @@ modifiers 8388608 modifiersubtext - Lookup this word in bing dict. + Lookup this word in in bing dictionary. CF3B3202-CBC6-459A-80F8-4BD2D77EBAC6 @@ -140,7 +140,7 @@ createdby liberize description - Lookup word in system dict, youdao dict, iciba dict, baidu dict or bing dict. + Lookup word in multiple dictionaries. disabled name @@ -264,7 +264,7 @@ escaping 100 keyword - dict + cc queuedelaycustom 1 queuedelayimmediatelyinitially @@ -278,7 +278,7 @@ script python ./query.py "{query}" subtext - Lookup word in system dict, youdao dict, iciba dict, baidu dict or bing dict. + Lookup word in multiple dictionaries. title Dict - Lookup Word type @@ -320,7 +320,7 @@ escaping 100 script - python ./actions.py "{query} @ sys" + python ./actions.py "{query} @ nj" type 0 @@ -358,47 +358,47 @@ 1CBAA26B-7D1D-47B5-A997-170596301266 ypos - 170 + 170.0 3A28134B-75EF-4C8E-9578-8D28B5AAC7F5 ypos - 350 + 350.0 3E2DFED0-0EEB-4957-ADB0-A804CBBE3747 ypos - 650 + 650.0 89A1C4A1-2813-4CEC-B312-B728E54B5D59 ypos - 530 + 530.0 9A8F49BF-1304-4AC1-BA87-2BCF23C6D9C3 ypos - 410 + 410.0 C6E96055-A9FC-4B3E-BE9B-8D9B6B7ED15F ypos - 350 + 350.0 CF3B3202-CBC6-459A-80F8-4BD2D77EBAC6 ypos - 290 + 290.0 D29C9083-5950-4076-AAE6-E64E87E69F0B ypos - 350 + 350.0 F8F4627F-6071-4FD3-9249-014727AED0C4 ypos - 50 + 50.0 webaddress diff --git a/Dict - Lookup Word/query.py b/Dict - Lookup Word/query.py index 1104c86..90f162d 100644 --- a/Dict - Lookup Word/query.py +++ b/Dict - Lookup Word/query.py @@ -5,115 +5,107 @@ import re import sys import time -import plistlib import cndict +import json + from cache import Cache from feedback import Feedback +from alfredplist import AlfredPlist + +def query(dictionary, word): + global config, dict_cache -def query(dictionary, word, dict_cache): - now = time.time() + enable_cache = config['cache']['enable'] if config else True + if enable_cache: + cache_expire = (config['cache']['expire'] if config else 24) * 3600 + now = time.time() - # dict_cache.set('last lookup time', now, float('inf')) - # time.sleep(1) - # if dict_cache.get('last lookup time') != now: - # return + # dict_cache.set('last lookup time', now, float('inf')) + # time.sleep(1) + # if dict_cache.get('last lookup time') != now: + # return - clean_time = dict_cache.get('last clean time') - if clean_time is None or now - clean_time > 3600 * 24: - dict_cache.set('last clean time', now, float('inf')) - dict_cache.clean_expired() + clean_time = dict_cache.get('last clean time') + if clean_time is None or now - clean_time > cache_expire: + dict_cache.set('last clean time', now, float('inf')) + dict_cache.clean_expired() - cache_name = '{}@{}'.format(word, dictionary) - cache = dict_cache.get(cache_name) - if cache: - return cache + cache_name = '{}@{}'.format(word, dictionary) + cache = dict_cache.get(cache_name) + if cache: + return cache - result = cndict.lookup(dictionary, word) + options = config['options'] if config else {} + dict_name = cndict.get_full_name(dictionary) + options = options.get(dict_name, {}) + + result = cndict.lookup(dictionary, word, **options) if result: result = [item.decode('utf-8') for item in result] - dict_cache.set(cache_name, result, 3600 * 24) + if enable_cache: + dict_cache.set(cache_name, result, cache_expire) return result feedback = Feedback() +plist = AlfredPlist() +plist.read(os.path.abspath('./info.plist')) +base_dir = os.path.expanduser('~/Library/Caches/com.runningwithcrayons.Alfred-2/Workflow Data/') +dict_cache = Cache(os.path.join(base_dir, plist.get_bundleid())) + +try: + config_data = open(os.path.abspath('./config.json')).read() + config = json.loads(re.sub(r'//.*', '', config_data)) +except: + config = {} + sys.argv = [arg for arg in sys.argv if arg != ''] argc = len(sys.argv) if argc == 1: feedback.add_item(title=u'Dict - Lookup Word', - subtitle=u'Format: "word @ dict". Available dicts are "sys", "yd", "cb", "bd", "by".', + subtitle=u'Format: "word @ dict". Available dicts are "nj", "ld", "yd", "cb", "bd", "by", "hc".', valid=False) elif argc == 2: arg = sys.argv[1] pos = arg.rfind('@') if pos == -1: word = arg.strip() - dictionary = 'sys' + dictionary = config['default'] if config else 'nj' else: word = arg[:pos].strip() dictionary = arg[pos+1:].strip() if dictionary == '': - dictionary = 'sys' - plist = plistlib.readPlist(os.path.abspath('./info.plist')) - bundle_id = plist['bundleid'].strip() - base_dir = os.path.expanduser('~/Library/Caches/com.runningwithcrayons.Alfred-2/Workflow Data/') - dict_cache = Cache(os.path.join(base_dir, bundle_id)) - - internal_cmds = { - 'clean': 'Clean cache', - 'sysdict ': 'Set system dictionary to "oxford" or "landau"', - 'defact ': 'Set default action to "view full definition" or "pronounce word"', - } + dictionary = config['default'] if config else 'nj' - if word.startswith(':'): - cmd = word.lstrip(':').split(' ') - if cmd[0] == '': - feedback.add_item(title='Internal commands', valid=False) - for cmdl, desc in internal_cmds.iteritems(): - feedback.add_item(title=cmdl, subtitle=desc, - arg=':{} '.format(cmdl.split(' ')[0]), valid=True) - else: - success = False - if cmd[0] == u'clean': - dict_cache.clean() - success = True - elif cmd[0] == u'sysdict': - if len(cmd) == 2: - system_dict = {'o': 'oxford', 'l': 'landau'}.get(cmd[1], None) - if system_dict: - content = open('cndict/systemdict.py').read() - content = re.sub(r'(?<=DEFAULT_DICT_NAME = ).*', "'{}'".format(system_dict), content) - open('cndict/systemdict.py', 'w').write(content) - success = True - elif cmd[0] == u'defact': - if len(cmd) == 2: - default_action = {'v': 'open', 'p': 'say'}.get(cmd[1], None) - if default_action: - content = open('./info.plist').read() - content = re.sub(r'\b(open|say)\b', default_action, content) - open('./info.plist', 'w').write(content) - success = True - if success: - feedback.add_item(title='Command executed successfully', - subtitle=u'Press "↩" to return.', - arg=':', valid=True) - else: - feedback.add_item(title='Invalid command', - subtitle=u'Press "↩" to view available internal commands.', - arg=':', valid=True) + if word == ':': + internal_cmds = { + 'clean': 'Clean cache', + 'config': 'Edit config file', + 'update': 'Update config' + } + feedback.add_item(title='Internal commands', + subtitle=u'Press "↩" to execute selected internal command', + valid=False) + for cmd, desc in internal_cmds.iteritems(): + feedback.add_item(title=cmd, subtitle=desc, + arg=':{}'.format(cmd), valid=True) else: try: - result = query(dictionary, word, dict_cache) + result = query(dictionary, word) arg = u'{} @ {}'.format(word.decode('utf-8'), dictionary.decode('utf-8')) if result: + action = config['keymap']['none'] if config else 'open' feedback.add_item(title=result[0], - subtitle=u'Press "↩" to view full definition or "⌘/⌥/⌃/⇧/fn + ↩" to lookup word in other dicts.', + subtitle=u'Press "↩" to {} or "⌘/⌥/⌃/⇧/fn + ↩" to lookup word in other dicts.'.format( + 'view full definition' if action == 'open' else 'pronounce word'), arg=arg, valid=True) for item in result[1:]: feedback.add_item(title=item, arg=u'{} | {}'.format(arg, item), valid=True) else: feedback.add_item(title='Dict - Lookup Word', - subtitle=u'Word "{}" doesn\'t exist in dict "{}".'.format(word.decode('utf-8'), dictionary.decode('utf-8')), + subtitle=u'Word "{}" doesn\'t exist in dict "{}".'.format( + word.decode('utf-8'), dictionary.decode('utf-8')), arg=arg, valid=True) except cndict.DictLookupError, e: feedback.add_item(title=word, subtitle='Error: {}'.format(e), valid=False) diff --git a/README.md b/README.md index c8adbdb..510d3e6 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@ * 爱词霸在线词典 * 百度在线词典 * 必应在线词典 + * 海词在线词典 * 支持英汉、汉英互查 * 支持音标,默认显示美式音标 * 使用系统 TTS 引擎发音 @@ -17,6 +18,7 @@ * 可使用快捷键进行词典切换 * 缓存查询结果,方便下次查询 * 支持快捷键取词 +* 支持自定义配置 ## 截图 @@ -24,45 +26,75 @@ ## 查词用法 -关键词为 `dict`,默认使用系统词典,使用以下格式指定词典: +查词: - dict {word} @ {dict} + cc {word} @ {dict} -词典代号及切换快捷键: +词典代号: -词典 | 代号 | 快捷键 --------- | ----------- | --------- -系统 | sys, system | `⌘` `↩` -有道 | yd, youdao | `⌥` `↩` -爱词霸 | cb, iciba | `⌃` `↩` -百度 | bd, baidu | `⇧` `↩` -必应 | by, bing | `fn` `↩` +词典 | 代号 +------------------ | ----------- +系统内置牛津词典 | nj, oxford +朗道本地词典(需下载) | ld, landau +有道在线词典 | yd, youdao +爱词霸在线词典 | cb, iciba +百度在线词典 | bd, baidu +必应在线词典 | by, bing +海词在线词典 | hc,dictcn + +注: + +* 关键词默认为 cc(即查词),可通过配置文件修改。 +* 每个词典有长短两个代号,短的代号为拼音缩写,便于记忆,长的代号为全称。 +* 可以为每个词典启用一个切换快捷键,`⌘`/`⌥`/`⌃`/`⇧`/`fn` + `↩`,可通过配置文件修改。 +* 朗道词典非系统内置,请先[下载](http://pan.baidu.com/s/1qWx4mV6),然后复制到 `~/Library/Dictionaries/` 目录。 +* 由于必应、海词没有提供 API,只能通过解析 HTML 得到,因此速度可能稍慢(已优化)。 ## 内部命令 -使用以下格式执行内部命令: +查看内部命令: + + cc : + +命令 | 功能 +------- | --------------------------------- +clean | 清除所有缓存 +config | 编辑配置文件(json 格式) +update | 修改配置文件的某些项后需要更新才能生效 - dict :{command} +建议每次修改配置文件后,执行一次 update,确保生效。 -命令 | 参数 | 功能 -------- | ------ | --------------------------------- -clean | 无 | 清除缓存 -sysdict | o 或 l | 设置系统词典,牛津(o)或朗道(l) -defact | v 或 p | 设置默认操作,查看完整释义(v)或发音(p) +## 配置文件 -例如,系统词典默认使用朗道词典,如果想切换为牛津词典,请输入 - - dict :sysdict o +配置文件为 json 格式,目前有以下选项: -修改或更新之后,如果想立即生效,请清空缓存: +* "keyword": 关键字,默认为 "cc"。 +* "default": 默认词典,即省略 `@ {dict}` 时使用的词典,默认为"nj"。 +* "keymap": 键绑定,修改切换词典快捷键,支持以下修饰键: + * "none": 直接回车时的行为,可取 "open" 或 "say": + - "open":打开详细解释页面(浏览器或系统词典)。 + - "say":发音,目前只支持系统 tts 引擎。 + * "ctrl/alt/shift/cmd/fn": 词典代号,长短皆可。 +* "options": 具体词典相关的一些选项,一般不用修改。 + * "oxford": 牛津词典选项: + * "external_cmd": 使用外部命令或 python 接口查词,默认为 "true"。 + * "dictcn": 海词词典选项: + * "wap_page": 是否使用 wap 页面查词,wap 页面速度可能快一些,但信息较少,默认为 "false"。 + * "bing": 必应词典选项: + * "wap_page": 是否使用 wap 页面查词,信息一样,但 wap 页面速度可能快一些,默认为 "true"。 +* "cache": 缓存相关的设置。 + * "enable": 打开或关闭缓存,默认为 "true"。 + * "expire": 缓存失效时间,以小时为单位,默认为 "24"。 - dict :clean +注: -## 其他 +* 配置文件里同样有较详细英文注释,请修改之前务必了解每个选项的作用。 +* "keyword" 和 "keymap" 这两个选项修改完之后执行 update 才能生效。 +* 多数情况下不用修改配置文件,默认即可。 -朗道词典非系统内置,请先[下载](http://pan.baidu.com/s/1qWx4mV6),然后复制到 `~/Library/Dictionaries/` 目录. +## LICENSE -由于必应词典没有提供 API,只能通过解析 HTML 得到,因此速度可能稍慢。 +GPL ## 联系我