From e27f14f59a4d1108e88562d48264d56312ea6ffc Mon Sep 17 00:00:00 2001 From: Abdullah Diab Date: Sat, 27 Jun 2020 21:56:05 +0200 Subject: [PATCH] v2.1.0 (#44) * initial work * fix CI * Work done * Fix README --- .gitignore | 1 + .travis.yml | 27 +++--- README.md | 47 ++++++++++- arabic_reshaper/__init__.py | 6 ++ arabic_reshaper/__version__.py | 2 +- arabic_reshaper/arabic_reshaper.py | 83 ++---------------- arabic_reshaper/ligatures.py | 12 ++- arabic_reshaper/reshaper_config.py | 131 +++++++++++++++++++++++++++++ meta.yaml | 18 ++-- setup.py | 9 +- tox.ini | 7 ++ 11 files changed, 241 insertions(+), 102 deletions(-) create mode 100644 arabic_reshaper/reshaper_config.py create mode 100644 tox.ini diff --git a/.gitignore b/.gitignore index d468ba1..752d6f9 100644 --- a/.gitignore +++ b/.gitignore @@ -24,6 +24,7 @@ wheels/ *.egg-info/ .installed.cfg *.egg +.tox/ # Installer logs pip-log.txt diff --git a/.travis.yml b/.travis.yml index e1a26af..eb85427 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,12 +1,19 @@ -# https://travis-ci.org/mpcabd/python-arabic-reshaper -dist: xenial language: python -python: - - "2.7" - - "3.5" - - "3.6" - - "3.7" install: - - "pip install -e ." -script: - - "python setup.py test" + - pip install tox + - pip install -e . +matrix: + include: + - python: 2.7 + env: + - TOX_ENV=py27 + - python: 3.6 + env: + - TOX_ENV=py36 + - python: 3.7 + env: + - TOX_ENV=py37 + - python: 3.8 + env: + - TOX_ENV=py38 +script: tox -e $TOX_ENV \ No newline at end of file diff --git a/README.md b/README.md index e29c78e..e9fa8e5 100644 --- a/README.md +++ b/README.md @@ -187,6 +187,45 @@ define an environment variable with the name to the configuration file. This way the reshape function will pick it automatically, and you won't have to change your old code. +## Settings based on a TrueType® font + +If you intend to render the text in a TrueType® font, you can tell the library +to generate its configuration by reading the font file to figure out what's +supported in the font and what's not. + +To use this feature you need to install the library with an extra option +(not necessary when you install it with conda): + + pip install --upgrade arabic-reshaper[with-fonttools] + +Then you can use the reshaper like this: + +```python +import arabic_reshaper + +reshaper = arabic_reshaper.ArabicReshaper( + arabic_reshaper.config_for_true_type_font( + '/path/to/true-type-font.ttf', + arabic_reshaper.ENABLE_ALL_LIGATURES + ) +) +``` + +This will parse the font file, and figure out what ligatures it supports and enable them, +as well as whether it has isolated forms or `use_unshaped_instead_of_isolated` should be +enabled. + +The second parameter to `config_for_true_type_font` can be one of + +- `ENABLE_NO_LIGATURES` +- `ENABLE_SENTENCES_LIGATURES` +- `ENABLE_WORDS_LIGATURES` +- `ENABLE_LETTERS_LIGATURES` +- `ENABLE_ALL_LIGATURES` (default) + +which controls what ligatures to look for, depending on your usage, +see [default-config.ini](default-config.ini) to know what ligatures are there. + ## Tashkeel/Harakat issue [Harakat or Tashkeel](http://en.wikipedia.org/wiki/Arabic_diacritics#Tashkil_.28marks_used_as_phonetic_guides.29) @@ -209,6 +248,10 @@ https://github.com/mpcabd/python-arabic-reshaper/tarball/master ## Version History +### 2.1.0 + +* Added support for settings based on a TrueType® font + ### 2.0.14 * New option `use_unshaped_instead_of_isolated` to get around some fonts missing the isolated form for letters. @@ -290,8 +333,8 @@ to Python. ## Contact Abdullah Diab (mpcabd) -Email: mpcabd@gmail.com -Blog: http://mpcabd.xyz +Email: mpcabd@gmail.com +Blog: http://mpcabd.xyz For more info visit my blog [post here](http://mpcabd.xyz/python-arabic-text-reshaper/) diff --git a/arabic_reshaper/__init__.py b/arabic_reshaper/__init__.py index 62cf57e..43cbf1c 100644 --- a/arabic_reshaper/__init__.py +++ b/arabic_reshaper/__init__.py @@ -1,6 +1,12 @@ import os from .arabic_reshaper import reshape, default_reshaper, ArabicReshaper +from .reshaper_config import (config_for_true_type_font, + ENABLE_NO_LIGATURES, + ENABLE_SENTENCES_LIGATURES, + ENABLE_WORDS_LIGATURES, + ENABLE_LETTERS_LIGATURES, + ENABLE_ALL_LIGATURES) exec(open(os.path.join(os.path.dirname(__file__), '__version__.py')).read()) diff --git a/arabic_reshaper/__version__.py b/arabic_reshaper/__version__.py index 897e313..a33997d 100644 --- a/arabic_reshaper/__version__.py +++ b/arabic_reshaper/__version__.py @@ -1 +1 @@ -__version__ = '2.0.15' +__version__ = '2.1.0' diff --git a/arabic_reshaper/arabic_reshaper.py b/arabic_reshaper/arabic_reshaper.py index a987081..4721a6a 100644 --- a/arabic_reshaper/arabic_reshaper.py +++ b/arabic_reshaper/arabic_reshaper.py @@ -7,30 +7,14 @@ # Email: mpcabd@gmail.com # Website: http://mpcabd.xyz -# Ported and tweaked from Java to Python, from Better Arabic Reshaper -# [https://github.com/agawish/Better-Arabic-Reshaper/] - -# Usage: -# Install python-bidi [https://github.com/MeirKriheli/python-bidi], can be -# installed from pip `pip install python-bidi`. - -# import arabic_reshaper -# from bidi.algorithm import get_display -# reshaped_text = arabic_reshaper.reshape('اللغة العربية رائعة') -# bidi_text = get_display(reshaped_text) -# Now you can pass `bidi_text` to any function that handles -# displaying/printing of the text, like writing it to PIL Image or passing it -# to a PDF generating method. from __future__ import unicode_literals import re -import os -from configparser import ConfigParser from itertools import repeat -from pkg_resources import resource_filename from .ligatures import LIGATURES +from .reshaper_config import auto_config from .letters import (UNSHAPED, ISOLATED, TATWEEL, ZWJ, LETTERS_ARABIC, LETTERS_ARABIC_V2, LETTERS_KURDISH, FINAL, INITIAL, MEDIAL, connects_with_letters_before_and_after, @@ -73,72 +57,19 @@ class ArabicReshaper(object): See the default configuration file :file:`default-config.ini` for details on how to configure your reshaper. """ + def __init__(self, configuration=None, configuration_file=None): super(ArabicReshaper, self).__init__() - configuration_files = [ - resource_filename(__name__, 'default-config.ini') - ] - - if not os.path.exists(configuration_files[0]): - raise Exception( - ('Default configuration file {} not found,' + - ' check the module installation.').format( - configuration_files[0], - ) - ) - - loaded_from_envvar = False - - if not configuration_file: - configuration_file = os.getenv( - 'PYTHON_ARABIC_RESHAPER_CONFIGURATION_FILE' - ) - if configuration_file: - loaded_from_envvar = True - - if configuration_file: - if not os.path.exists(configuration_file): - raise Exception( - 'Configuration file {} not found{}.'.format( - configuration_file, - loaded_from_envvar and ( - ' it is set in your environment variable ' + - 'PYTHON_ARABIC_RESHAPER_CONFIGURATION_FILE' - ) or '' - ) - ) - configuration_files.append(configuration_file) - - configuration_parser = ConfigParser() - configuration_parser.read( - configuration_files - ) - - if configuration: - configuration_parser.read_dict({ - 'ArabicReshaper': configuration - }) - - if 'ArabicReshaper' not in configuration_parser: - raise ValueError( - 'Invalid configuration: ' - 'A section with the name ArabicReshaper was not found' - ) - - configuration = configuration_parser['ArabicReshaper'] - self.configuration = configuration + self.configuration = auto_config(configuration, configuration_file) self.language = self.configuration.get('language') - if self.language == 'ArabicV2': self.letters = LETTERS_ARABIC_V2 elif self.language == 'Kurdish': self.letters = LETTERS_KURDISH else: self.letters = LETTERS_ARABIC - - @property def _ligatures_re(self): @@ -215,15 +146,15 @@ def reshape(self, text): previous_letter = output[-1] if previous_letter[FORM] == NOT_SUPPORTED: output.append((letter, isolated_form)) - elif not connects_with_letter_before(letter,self.letters): + elif not connects_with_letter_before(letter, self.letters): output.append((letter, isolated_form)) elif not connects_with_letter_after( - previous_letter[LETTER],self.letters): + previous_letter[LETTER], self.letters): output.append((letter, isolated_form)) elif (previous_letter[FORM] == FINAL and not connects_with_letters_before_and_after( - previous_letter[LETTER],self.letters - )): + previous_letter[LETTER], self.letters + )): output.append((letter, isolated_form)) elif previous_letter[FORM] == isolated_form: output[-1] = ( diff --git a/arabic_reshaper/ligatures.py b/arabic_reshaper/ligatures.py index ec515ab..64cc9ec 100644 --- a/arabic_reshaper/ligatures.py +++ b/arabic_reshaper/ligatures.py @@ -20,9 +20,9 @@ # This way we make sure we replace the longest ligatures first from __future__ import unicode_literals +from itertools import chain -LIGATURES = ( - # Sentences +SENTENCES_LIGATURES = ( ('ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM', ( '\u0628\u0633\u0645\u0020' '\u0627\u0644\u0644\u0647\u0020' @@ -44,8 +44,9 @@ ('\uFDFA', '', '', '') )), +) - # Words +WORDS_LIGATURES = ( ('ARABIC LIGATURE ALLAH', ( '\u0627\u0644\u0644\u0647', ('\uFDF2', '', '', ''), )), @@ -73,8 +74,9 @@ ('RIAL SIGN', ( '\u0631[\u06CC\u064A]\u0627\u0644', ('\uFDFC', '', '', ''), )), +) - # Letters +LETTERS_LIGATURES = ( ('ARABIC LIGATURE AIN WITH ALEF MAKSURA', ( '\u0639\u0649', ('\uFCF7', '', '', '\uFD13'), )), @@ -927,3 +929,5 @@ '\u0638\u0645', ('\uFC28', '\uFCB9', '\uFD3B', ''), )), ) + +LIGATURES = tuple(chain(SENTENCES_LIGATURES, WORDS_LIGATURES, LETTERS_LIGATURES)) diff --git a/arabic_reshaper/reshaper_config.py b/arabic_reshaper/reshaper_config.py new file mode 100644 index 0000000..22e1f23 --- /dev/null +++ b/arabic_reshaper/reshaper_config.py @@ -0,0 +1,131 @@ +# -*- coding: utf-8 -*- + +# This work is licensed under the MIT License. +# To view a copy of this license, visit https://opensource.org/licenses/MIT + +# Written by Abdullah Diab (mpcabd) +# Email: mpcabd@gmail.com +# Website: http://mpcabd.xyz + +from __future__ import unicode_literals + +import os + +from configparser import ConfigParser +from pkg_resources import resource_filename + +from .letters import (UNSHAPED, ISOLATED, LETTERS_ARABIC) +from .ligatures import (SENTENCES_LIGATURES, + WORDS_LIGATURES, + LETTERS_LIGATURES) + +try: + from fontTools.ttLib import TTFont + with_font_config = True +except ImportError: + with_font_config = False + +ENABLE_NO_LIGATURES = 0b000 +ENABLE_SENTENCES_LIGATURES = 0b001 +ENABLE_WORDS_LIGATURES = 0b010 +ENABLE_LETTERS_LIGATURES = 0b100 +ENABLE_ALL_LIGATURES = 0b111 + + +def auto_config(configuration=None, configuration_file=None): + configuration_files = [ + resource_filename(__name__, 'default-config.ini') + ] + + if not os.path.exists(configuration_files[0]): + raise Exception( + ('Default configuration file {} not found,' + + ' check the module installation.').format( + configuration_files[0], + ) + ) + + loaded_from_envvar = False + + if not configuration_file: + configuration_file = os.getenv( + 'PYTHON_ARABIC_RESHAPER_CONFIGURATION_FILE' + ) + if configuration_file: + loaded_from_envvar = True + + if configuration_file: + if not os.path.exists(configuration_file): + raise Exception( + 'Configuration file {} not found{}.'.format( + configuration_file, + loaded_from_envvar and ( + ' it is set in your environment variable ' + + 'PYTHON_ARABIC_RESHAPER_CONFIGURATION_FILE' + ) or '' + ) + ) + configuration_files.append(configuration_file) + + configuration_parser = ConfigParser() + configuration_parser.read( + configuration_files + ) + + if configuration: + configuration_parser.read_dict({ + 'ArabicReshaper': configuration + }) + + if 'ArabicReshaper' not in configuration_parser: + raise ValueError( + 'Invalid configuration: ' + 'A section with the name ArabicReshaper was not found' + ) + + return configuration_parser['ArabicReshaper'] + + +def config_for_true_type_font(font_file_path, + ligatures_config=ENABLE_ALL_LIGATURES): + if not with_font_config: + raise Exception('fonttools not installed, ' + + 'install it then rerun this.\n' + + '$ pip install arabic-teshaper[with-fonttools]') + if not font_file_path or not os.path.exists(font_file_path): + raise Exception('Invalid path to font file') + ttfont = TTFont(font_file_path) + has_isolated = True + for k, v in LETTERS_ARABIC.items(): + for table in ttfont['cmap'].tables: + if ord(v[ISOLATED]) in table.cmap: + break + else: + has_isolated = False + break + + configuration = { + 'use_unshaped_instead_of_isolated': not has_isolated, + } + + def process_ligatures(ligatures): + for ligature in ligatures: + forms = list(filter(lambda form: form != '', ligature[1][1])) + n = len(forms) + for form in forms: + for table in ttfont['cmap'].tables: + if ord(form) in table.cmap: + n -= 1 + break + configuration[ligature[0]] = (n == 0) + + if ENABLE_SENTENCES_LIGATURES & ligatures_config: + process_ligatures(SENTENCES_LIGATURES) + + if ENABLE_WORDS_LIGATURES & ligatures_config: + process_ligatures(WORDS_LIGATURES) + + if ENABLE_LETTERS_LIGATURES & ligatures_config: + process_ligatures(LETTERS_LIGATURES) + + return configuration diff --git a/meta.yaml b/meta.yaml index 27d070a..bfa0ecf 100644 --- a/meta.yaml +++ b/meta.yaml @@ -1,28 +1,33 @@ {% set name = "arabic-reshaper" %} {% set pypi_name = "arabic_reshaper" %} -{% set version = "2.0.15" %} +{% set version = "2.1.0" %} package: name: "{{ name|lower }}" version: "{{ version }}" source: - url: "https://pypi.io/packages/source/{{ pypi_name[0] }}/{{ pypi_name }}/{{ pypi_name }}-{{ version }}.tar.gz" - sha256: 3f3078db2a9a1c4f994145895e9193b1c01a1186547cea303e6fa920e9a00f0a + git_url: https://github.com/mpcabd/python-arabic-reshapergit + git_rev: "{{ version }}" + git_depth: 1 build: number: 0 - script: "{{ PYTHON }} -m pip install . -vv" + script: "{{ PYTHON }} -m pip install .[with-fonttools] -vv" requirements: host: - - configparser; # [ py < 3 ] + - configparser; # [ py < 3 ] + - fonttools >=4.0 # [ py > 3 ] + - fonttools >=3.0<4.0 # [ py < 3 ] - future - pip - python - setuptools run: - - configparser; # [ py < 3 ] + - configparser; # [ py < 3 ] + - fonttools >=4.0 # [ py > 3 ] + - fonttools >=3.0<4.0 # [ py < 3 ] - future - python - setuptools @@ -35,7 +40,6 @@ about: home: "https://mpcabd.xyz/python-arabic-text-reshaper/" license: MIT license_family: MIT - license_file: summary: "Reconstruct Arabic sentences to be used in applications that don't support Arabic" doc_url: "https://github.com/mpcabd/python-arabic-reshaper/" dev_url: "https://github.com/mpcabd/python-arabic-reshaper/" diff --git a/setup.py b/setup.py index 876b9d3..e94d4e3 100755 --- a/setup.py +++ b/setup.py @@ -24,6 +24,10 @@ install_requires=['configparser; python_version <"3"', 'future', 'setuptools'], + extras_require={ + 'with-fonttools': ['fonttools>=4.0; python_version >="3"', + 'fonttools>=3.0<4.0; python_version <"3"'] + }, author="Abdullah Diab", author_email="mpcabd@gmail.com", maintainer="Abdullah Diab", @@ -40,8 +44,9 @@ "Natural Language :: Arabic", "Operating System :: OS Independent", "Programming Language :: Python :: 2.7", - "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", "Topic :: Software Development :: Libraries :: Python Modules", ], - ) +) diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..9f5c1bc --- /dev/null +++ b/tox.ini @@ -0,0 +1,7 @@ +[tox] +envlist = py27,py36,py37,py38 + +[testenv] +deps = pytest +commands = + pytest \ No newline at end of file