-
Notifications
You must be signed in to change notification settings - Fork 82
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* initial work * fix CI * Work done * Fix README
- Loading branch information
Showing
11 changed files
with
241 additions
and
102 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,6 +24,7 @@ wheels/ | |
*.egg-info/ | ||
.installed.cfg | ||
*.egg | ||
.tox/ | ||
|
||
# Installer logs | ||
pip-log.txt | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,19 @@ | ||
# https://travis-ci.org/mpcabd/python-arabic-reshaper | ||
dist: xenial | ||
language: python | ||
python: | ||
- "2.7" | ||
- "3.5" | ||
- "3.6" | ||
- "3.7" | ||
install: | ||
- "pip install -e ." | ||
script: | ||
- "python setup.py test" | ||
- pip install tox | ||
- pip install -e . | ||
matrix: | ||
include: | ||
- python: 2.7 | ||
env: | ||
- TOX_ENV=py27 | ||
- python: 3.6 | ||
env: | ||
- TOX_ENV=py36 | ||
- python: 3.7 | ||
env: | ||
- TOX_ENV=py37 | ||
- python: 3.8 | ||
env: | ||
- TOX_ENV=py38 | ||
script: tox -e $TOX_ENV |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -187,6 +187,45 @@ define an environment variable with the name | |
to the configuration file. This way the reshape function will pick it | ||
automatically, and you won't have to change your old code. | ||
|
||
## Settings based on a TrueType® font | ||
|
||
If you intend to render the text in a TrueType® font, you can tell the library | ||
to generate its configuration by reading the font file to figure out what's | ||
supported in the font and what's not. | ||
|
||
To use this feature you need to install the library with an extra option | ||
(not necessary when you install it with conda): | ||
|
||
pip install --upgrade arabic-reshaper[with-fonttools] | ||
|
||
Then you can use the reshaper like this: | ||
|
||
```python | ||
import arabic_reshaper | ||
|
||
reshaper = arabic_reshaper.ArabicReshaper( | ||
arabic_reshaper.config_for_true_type_font( | ||
'/path/to/true-type-font.ttf', | ||
arabic_reshaper.ENABLE_ALL_LIGATURES | ||
) | ||
) | ||
``` | ||
|
||
This will parse the font file, and figure out what ligatures it supports and enable them, | ||
as well as whether it has isolated forms or `use_unshaped_instead_of_isolated` should be | ||
enabled. | ||
|
||
The second parameter to `config_for_true_type_font` can be one of | ||
|
||
- `ENABLE_NO_LIGATURES` | ||
- `ENABLE_SENTENCES_LIGATURES` | ||
- `ENABLE_WORDS_LIGATURES` | ||
- `ENABLE_LETTERS_LIGATURES` | ||
- `ENABLE_ALL_LIGATURES` (default) | ||
|
||
which controls what ligatures to look for, depending on your usage, | ||
see [default-config.ini](default-config.ini) to know what ligatures are there. | ||
|
||
## Tashkeel/Harakat issue | ||
|
||
[Harakat or Tashkeel](http://en.wikipedia.org/wiki/Arabic_diacritics#Tashkil_.28marks_used_as_phonetic_guides.29) | ||
|
@@ -209,6 +248,10 @@ https://github.com/mpcabd/python-arabic-reshaper/tarball/master | |
|
||
## Version History | ||
|
||
### 2.1.0 | ||
|
||
* Added support for settings based on a TrueType® font | ||
|
||
### 2.0.14 | ||
|
||
* New option `use_unshaped_instead_of_isolated` to get around some fonts missing the isolated form for letters. | ||
|
@@ -290,8 +333,8 @@ to Python. | |
## Contact | ||
|
||
Abdullah Diab (mpcabd) | ||
Email: [email protected] | ||
Blog: http://mpcabd.xyz | ||
Email: [email protected] | ||
Blog: http://mpcabd.xyz | ||
|
||
For more info visit my blog | ||
[post here](http://mpcabd.xyz/python-arabic-text-reshaper/) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,12 @@ | ||
import os | ||
|
||
from .arabic_reshaper import reshape, default_reshaper, ArabicReshaper | ||
from .reshaper_config import (config_for_true_type_font, | ||
ENABLE_NO_LIGATURES, | ||
ENABLE_SENTENCES_LIGATURES, | ||
ENABLE_WORDS_LIGATURES, | ||
ENABLE_LETTERS_LIGATURES, | ||
ENABLE_ALL_LIGATURES) | ||
|
||
|
||
exec(open(os.path.join(os.path.dirname(__file__), '__version__.py')).read()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
__version__ = '2.0.15' | ||
__version__ = '2.1.0' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,30 +7,14 @@ | |
# Email: [email protected] | ||
# Website: http://mpcabd.xyz | ||
|
||
# Ported and tweaked from Java to Python, from Better Arabic Reshaper | ||
# [https://github.com/agawish/Better-Arabic-Reshaper/] | ||
|
||
# Usage: | ||
# Install python-bidi [https://github.com/MeirKriheli/python-bidi], can be | ||
# installed from pip `pip install python-bidi`. | ||
|
||
# import arabic_reshaper | ||
# from bidi.algorithm import get_display | ||
# reshaped_text = arabic_reshaper.reshape('اللغة العربية رائعة') | ||
# bidi_text = get_display(reshaped_text) | ||
# Now you can pass `bidi_text` to any function that handles | ||
# displaying/printing of the text, like writing it to PIL Image or passing it | ||
# to a PDF generating method. | ||
from __future__ import unicode_literals | ||
|
||
import re | ||
import os | ||
|
||
from configparser import ConfigParser | ||
from itertools import repeat | ||
from pkg_resources import resource_filename | ||
|
||
from .ligatures import LIGATURES | ||
from .reshaper_config import auto_config | ||
from .letters import (UNSHAPED, ISOLATED, TATWEEL, ZWJ, LETTERS_ARABIC, | ||
LETTERS_ARABIC_V2, LETTERS_KURDISH, FINAL, | ||
INITIAL, MEDIAL, connects_with_letters_before_and_after, | ||
|
@@ -73,72 +57,19 @@ class ArabicReshaper(object): | |
See the default configuration file :file:`default-config.ini` for details | ||
on how to configure your reshaper. | ||
""" | ||
|
||
def __init__(self, configuration=None, configuration_file=None): | ||
super(ArabicReshaper, self).__init__() | ||
|
||
configuration_files = [ | ||
resource_filename(__name__, 'default-config.ini') | ||
] | ||
|
||
if not os.path.exists(configuration_files[0]): | ||
raise Exception( | ||
('Default configuration file {} not found,' + | ||
' check the module installation.').format( | ||
configuration_files[0], | ||
) | ||
) | ||
|
||
loaded_from_envvar = False | ||
|
||
if not configuration_file: | ||
configuration_file = os.getenv( | ||
'PYTHON_ARABIC_RESHAPER_CONFIGURATION_FILE' | ||
) | ||
if configuration_file: | ||
loaded_from_envvar = True | ||
|
||
if configuration_file: | ||
if not os.path.exists(configuration_file): | ||
raise Exception( | ||
'Configuration file {} not found{}.'.format( | ||
configuration_file, | ||
loaded_from_envvar and ( | ||
' it is set in your environment variable ' + | ||
'PYTHON_ARABIC_RESHAPER_CONFIGURATION_FILE' | ||
) or '' | ||
) | ||
) | ||
configuration_files.append(configuration_file) | ||
|
||
configuration_parser = ConfigParser() | ||
configuration_parser.read( | ||
configuration_files | ||
) | ||
|
||
if configuration: | ||
configuration_parser.read_dict({ | ||
'ArabicReshaper': configuration | ||
}) | ||
|
||
if 'ArabicReshaper' not in configuration_parser: | ||
raise ValueError( | ||
'Invalid configuration: ' | ||
'A section with the name ArabicReshaper was not found' | ||
) | ||
|
||
configuration = configuration_parser['ArabicReshaper'] | ||
self.configuration = configuration | ||
self.configuration = auto_config(configuration, configuration_file) | ||
self.language = self.configuration.get('language') | ||
|
||
|
||
if self.language == 'ArabicV2': | ||
self.letters = LETTERS_ARABIC_V2 | ||
elif self.language == 'Kurdish': | ||
self.letters = LETTERS_KURDISH | ||
else: | ||
self.letters = LETTERS_ARABIC | ||
|
||
|
||
|
||
@property | ||
def _ligatures_re(self): | ||
|
@@ -215,15 +146,15 @@ def reshape(self, text): | |
previous_letter = output[-1] | ||
if previous_letter[FORM] == NOT_SUPPORTED: | ||
output.append((letter, isolated_form)) | ||
elif not connects_with_letter_before(letter,self.letters): | ||
elif not connects_with_letter_before(letter, self.letters): | ||
output.append((letter, isolated_form)) | ||
elif not connects_with_letter_after( | ||
previous_letter[LETTER],self.letters): | ||
previous_letter[LETTER], self.letters): | ||
output.append((letter, isolated_form)) | ||
elif (previous_letter[FORM] == FINAL and not | ||
connects_with_letters_before_and_after( | ||
previous_letter[LETTER],self.letters | ||
)): | ||
previous_letter[LETTER], self.letters | ||
)): | ||
output.append((letter, isolated_form)) | ||
elif previous_letter[FORM] == isolated_form: | ||
output[-1] = ( | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.