Skip to content

Commit

Permalink
v2.1.0 (#44)
Browse files Browse the repository at this point in the history
* initial work

* fix CI

* Work done

* Fix README
  • Loading branch information
mpcabd authored Jun 27, 2020
1 parent c3c27eb commit e27f14f
Show file tree
Hide file tree
Showing 11 changed files with 241 additions and 102 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ wheels/
*.egg-info/
.installed.cfg
*.egg
.tox/

# Installer logs
pip-log.txt
Expand Down
27 changes: 17 additions & 10 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,12 +1,19 @@
# https://travis-ci.org/mpcabd/python-arabic-reshaper
dist: xenial
language: python
python:
- "2.7"
- "3.5"
- "3.6"
- "3.7"
install:
- "pip install -e ."
script:
- "python setup.py test"
- pip install tox
- pip install -e .
matrix:
include:
- python: 2.7
env:
- TOX_ENV=py27
- python: 3.6
env:
- TOX_ENV=py36
- python: 3.7
env:
- TOX_ENV=py37
- python: 3.8
env:
- TOX_ENV=py38
script: tox -e $TOX_ENV
47 changes: 45 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,45 @@ define an environment variable with the name
to the configuration file. This way the reshape function will pick it
automatically, and you won't have to change your old code.

## Settings based on a TrueType® font

If you intend to render the text in a TrueType® font, you can tell the library
to generate its configuration by reading the font file to figure out what's
supported in the font and what's not.

To use this feature you need to install the library with an extra option
(not necessary when you install it with conda):

pip install --upgrade arabic-reshaper[with-fonttools]

Then you can use the reshaper like this:

```python
import arabic_reshaper

reshaper = arabic_reshaper.ArabicReshaper(
arabic_reshaper.config_for_true_type_font(
'/path/to/true-type-font.ttf',
arabic_reshaper.ENABLE_ALL_LIGATURES
)
)
```

This will parse the font file, and figure out what ligatures it supports and enable them,
as well as whether it has isolated forms or `use_unshaped_instead_of_isolated` should be
enabled.

The second parameter to `config_for_true_type_font` can be one of

- `ENABLE_NO_LIGATURES`
- `ENABLE_SENTENCES_LIGATURES`
- `ENABLE_WORDS_LIGATURES`
- `ENABLE_LETTERS_LIGATURES`
- `ENABLE_ALL_LIGATURES` (default)

which controls what ligatures to look for, depending on your usage,
see [default-config.ini](default-config.ini) to know what ligatures are there.

## Tashkeel/Harakat issue

[Harakat or Tashkeel](http://en.wikipedia.org/wiki/Arabic_diacritics#Tashkil_.28marks_used_as_phonetic_guides.29)
Expand All @@ -209,6 +248,10 @@ https://github.com/mpcabd/python-arabic-reshaper/tarball/master

## Version History

### 2.1.0

* Added support for settings based on a TrueType® font

### 2.0.14

* New option `use_unshaped_instead_of_isolated` to get around some fonts missing the isolated form for letters.
Expand Down Expand Up @@ -290,8 +333,8 @@ to Python.
## Contact

Abdullah Diab (mpcabd)
Email: [email protected]
Blog: http://mpcabd.xyz
Email: [email protected]
Blog: http://mpcabd.xyz

For more info visit my blog
[post here](http://mpcabd.xyz/python-arabic-text-reshaper/)
6 changes: 6 additions & 0 deletions arabic_reshaper/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
import os

from .arabic_reshaper import reshape, default_reshaper, ArabicReshaper
from .reshaper_config import (config_for_true_type_font,
ENABLE_NO_LIGATURES,
ENABLE_SENTENCES_LIGATURES,
ENABLE_WORDS_LIGATURES,
ENABLE_LETTERS_LIGATURES,
ENABLE_ALL_LIGATURES)


exec(open(os.path.join(os.path.dirname(__file__), '__version__.py')).read())
2 changes: 1 addition & 1 deletion arabic_reshaper/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '2.0.15'
__version__ = '2.1.0'
83 changes: 7 additions & 76 deletions arabic_reshaper/arabic_reshaper.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,30 +7,14 @@
# Email: [email protected]
# Website: http://mpcabd.xyz

# Ported and tweaked from Java to Python, from Better Arabic Reshaper
# [https://github.com/agawish/Better-Arabic-Reshaper/]

# Usage:
# Install python-bidi [https://github.com/MeirKriheli/python-bidi], can be
# installed from pip `pip install python-bidi`.

# import arabic_reshaper
# from bidi.algorithm import get_display
# reshaped_text = arabic_reshaper.reshape('اللغة العربية رائعة')
# bidi_text = get_display(reshaped_text)
# Now you can pass `bidi_text` to any function that handles
# displaying/printing of the text, like writing it to PIL Image or passing it
# to a PDF generating method.
from __future__ import unicode_literals

import re
import os

from configparser import ConfigParser
from itertools import repeat
from pkg_resources import resource_filename

from .ligatures import LIGATURES
from .reshaper_config import auto_config
from .letters import (UNSHAPED, ISOLATED, TATWEEL, ZWJ, LETTERS_ARABIC,
LETTERS_ARABIC_V2, LETTERS_KURDISH, FINAL,
INITIAL, MEDIAL, connects_with_letters_before_and_after,
Expand Down Expand Up @@ -73,72 +57,19 @@ class ArabicReshaper(object):
See the default configuration file :file:`default-config.ini` for details
on how to configure your reshaper.
"""

def __init__(self, configuration=None, configuration_file=None):
super(ArabicReshaper, self).__init__()

configuration_files = [
resource_filename(__name__, 'default-config.ini')
]

if not os.path.exists(configuration_files[0]):
raise Exception(
('Default configuration file {} not found,' +
' check the module installation.').format(
configuration_files[0],
)
)

loaded_from_envvar = False

if not configuration_file:
configuration_file = os.getenv(
'PYTHON_ARABIC_RESHAPER_CONFIGURATION_FILE'
)
if configuration_file:
loaded_from_envvar = True

if configuration_file:
if not os.path.exists(configuration_file):
raise Exception(
'Configuration file {} not found{}.'.format(
configuration_file,
loaded_from_envvar and (
' it is set in your environment variable ' +
'PYTHON_ARABIC_RESHAPER_CONFIGURATION_FILE'
) or ''
)
)
configuration_files.append(configuration_file)

configuration_parser = ConfigParser()
configuration_parser.read(
configuration_files
)

if configuration:
configuration_parser.read_dict({
'ArabicReshaper': configuration
})

if 'ArabicReshaper' not in configuration_parser:
raise ValueError(
'Invalid configuration: '
'A section with the name ArabicReshaper was not found'
)

configuration = configuration_parser['ArabicReshaper']
self.configuration = configuration
self.configuration = auto_config(configuration, configuration_file)
self.language = self.configuration.get('language')


if self.language == 'ArabicV2':
self.letters = LETTERS_ARABIC_V2
elif self.language == 'Kurdish':
self.letters = LETTERS_KURDISH
else:
self.letters = LETTERS_ARABIC



@property
def _ligatures_re(self):
Expand Down Expand Up @@ -215,15 +146,15 @@ def reshape(self, text):
previous_letter = output[-1]
if previous_letter[FORM] == NOT_SUPPORTED:
output.append((letter, isolated_form))
elif not connects_with_letter_before(letter,self.letters):
elif not connects_with_letter_before(letter, self.letters):
output.append((letter, isolated_form))
elif not connects_with_letter_after(
previous_letter[LETTER],self.letters):
previous_letter[LETTER], self.letters):
output.append((letter, isolated_form))
elif (previous_letter[FORM] == FINAL and not
connects_with_letters_before_and_after(
previous_letter[LETTER],self.letters
)):
previous_letter[LETTER], self.letters
)):
output.append((letter, isolated_form))
elif previous_letter[FORM] == isolated_form:
output[-1] = (
Expand Down
12 changes: 8 additions & 4 deletions arabic_reshaper/ligatures.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@
# This way we make sure we replace the longest ligatures first

from __future__ import unicode_literals
from itertools import chain

LIGATURES = (
# Sentences
SENTENCES_LIGATURES = (
('ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM', (
'\u0628\u0633\u0645\u0020'
'\u0627\u0644\u0644\u0647\u0020'
Expand All @@ -44,8 +44,9 @@

('\uFDFA', '', '', '')
)),
)

# Words
WORDS_LIGATURES = (
('ARABIC LIGATURE ALLAH', (
'\u0627\u0644\u0644\u0647', ('\uFDF2', '', '', ''),
)),
Expand Down Expand Up @@ -73,8 +74,9 @@
('RIAL SIGN', (
'\u0631[\u06CC\u064A]\u0627\u0644', ('\uFDFC', '', '', ''),
)),
)

# Letters
LETTERS_LIGATURES = (
('ARABIC LIGATURE AIN WITH ALEF MAKSURA', (
'\u0639\u0649', ('\uFCF7', '', '', '\uFD13'),
)),
Expand Down Expand Up @@ -927,3 +929,5 @@
'\u0638\u0645', ('\uFC28', '\uFCB9', '\uFD3B', ''),
)),
)

LIGATURES = tuple(chain(SENTENCES_LIGATURES, WORDS_LIGATURES, LETTERS_LIGATURES))
Loading

0 comments on commit e27f14f

Please sign in to comment.