Skip to content

Commit

Permalink
Merge pull request #13 from trufont/use-pep393-unicode-api
Browse files Browse the repository at this point in the history
 remove use of legacy Py_UNICODE; use PEP393 unicode strings
  • Loading branch information
anthrotype authored Dec 13, 2018
2 parents 57ac02c + 5f7d109 commit 395e256
Show file tree
Hide file tree
Showing 6 changed files with 236 additions and 31 deletions.
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
zip_safe=False,
setup_requires=["setuptools_scm"],
cmake_args=cmake_args,
python_requires=">=3.5",
)


Expand Down
69 changes: 40 additions & 29 deletions src/uharfbuzz/_harfbuzz.pyx
Original file line number Diff line number Diff line change
@@ -1,13 +1,24 @@
#cython: language_level=3
from charfbuzz cimport *
from cpython.unicode cimport PyUnicode_AS_UNICODE, PyUnicode_GET_SIZE
from libc.stdint cimport uint16_t, uint32_t
from libc.stdlib cimport free, malloc
from libc.string cimport const_char
from typing import Callable, Dict, List, Tuple


cdef bint PY_NARROW_UNICODE = sizeof(Py_UNICODE) != 4
cdef extern from "Python.h":
# PEP 393
bint PyUnicode_IS_READY(object u)
Py_ssize_t PyUnicode_GET_LENGTH(object u)
int PyUnicode_KIND(object u)
void* PyUnicode_DATA(object u)
ctypedef uint8_t Py_UCS1
ctypedef uint16_t Py_UCS2
Py_UCS1 PyUnicode_1BYTE_DATA(object u)
Py_UCS2 PyUnicode_2BYTE_DATA(object u)
Py_UCS4 PyUnicode_4BYTE_DATA(object u)
int PyUnicode_1BYTE_KIND
int PyUnicode_2BYTE_KIND
int PyUnicode_4BYTE_KIND


cdef class GlyphInfo:
Expand Down Expand Up @@ -151,13 +162,9 @@ cdef class Buffer:
self._hb_buffer, hb_script_from_string(cstr, -1))

def add_codepoints(self, codepoints: List[int],
item_offset: int = None, item_length: int = None) -> None:
item_offset: int = 0, item_length: int = -1) -> None:
cdef unsigned int size = len(codepoints)
cdef hb_codepoint_t* hb_codepoints
if item_offset is None:
item_offset = 0
if item_length is None:
item_length = size
if not size:
hb_codepoints = NULL
else:
Expand All @@ -171,40 +178,44 @@ cdef class Buffer:
free(hb_codepoints)

def add_utf8(self, text: bytes,
item_offset: int = None, item_length: int = None) -> None:
cdef unsigned int size = len(text)
if item_offset is None:
item_offset = 0
if item_length is None:
item_length = size
cdef char* cstr = text
item_offset: int = 0, item_length: int = -1) -> None:
hb_buffer_add_utf8(
self._hb_buffer, cstr, size, item_offset, item_length)
self._hb_buffer, text, len(text), item_offset, item_length)

def add_str(self, text: str,
item_offset: int = None, item_length: int = None) -> None:
cdef Py_UNICODE* array = PyUnicode_AS_UNICODE(text)
cdef Py_ssize_t size = PyUnicode_GET_SIZE(text)
if item_offset is None:
item_offset = 0
if item_length is None:
item_length = size
if PY_NARROW_UNICODE:
item_offset: int = 0, item_length: int = -1) -> None:
# ensure unicode string is in the "canonical" representation
assert PyUnicode_IS_READY(text)

cdef Py_ssize_t length = PyUnicode_GET_LENGTH(text)
cdef int kind = PyUnicode_KIND(text)

if kind == PyUnicode_1BYTE_KIND:
hb_buffer_add_latin1(
self._hb_buffer,
<uint8_t*>PyUnicode_1BYTE_DATA(text),
length,
item_offset,
item_length,
)
elif kind == PyUnicode_2BYTE_KIND:
hb_buffer_add_utf16(
self._hb_buffer,
<uint16_t*>array,
size,
<uint16_t*>PyUnicode_2BYTE_DATA(text),
length,
item_offset,
item_length,
)
else:
elif kind == PyUnicode_4BYTE_KIND:
hb_buffer_add_utf32(
self._hb_buffer,
<uint32_t*>array,
size,
<uint32_t*>PyUnicode_4BYTE_DATA(text),
length,
item_offset,
item_length,
)
else:
raise AssertionError(kind)

def guess_segment_properties(self) -> None:
hb_buffer_guess_segment_properties(self._hb_buffer)
Expand Down
6 changes: 5 additions & 1 deletion src/uharfbuzz/charfbuzz.pxd
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from libc.stdint cimport uint16_t, uint32_t
from libc.stdint cimport uint8_t, uint16_t, uint32_t


cdef extern from "hb.h":
Expand Down Expand Up @@ -86,6 +86,10 @@ cdef extern from "hb.h":
hb_buffer_t* buffer,
const hb_codepoint_t* text, int text_length,
unsigned int item_offset, int item_length)
void hb_buffer_add_latin1(
hb_buffer_t* buffer,
const uint8_t* text, int text_length,
unsigned int item_offset, int item_length)
void hb_buffer_add_utf8(
hb_buffer_t* buffer,
const char* text, int text_length,
Expand Down
Binary file added tests/data/AdobeBlank.subset.ttf
Binary file not shown.
92 changes: 92 additions & 0 deletions tests/data/LICENSE_AdobeBlank.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
This Font Software is licensed under the SIL Open Font License,
Version 1.1.

This license is copied below, and is also available with a FAQ at:
http://scripts.sil.org/OFL

-----------------------------------------------------------
SIL OPEN FONT LICENSE Version 1.1 - 26 February 2007
-----------------------------------------------------------

PREAMBLE
The goals of the Open Font License (OFL) are to stimulate worldwide
development of collaborative font projects, to support the font
creation efforts of academic and linguistic communities, and to
provide a free and open framework in which fonts may be shared and
improved in partnership with others.

The OFL allows the licensed fonts to be used, studied, modified and
redistributed freely as long as they are not sold by themselves. The
fonts, including any derivative works, can be bundled, embedded,
redistributed and/or sold with any software provided that any reserved
names are not used by derivative works. The fonts and derivatives,
however, cannot be released under any other type of license. The
requirement for fonts to remain under this license does not apply to
any document created using the fonts or their derivatives.

DEFINITIONS
"Font Software" refers to the set of files released by the Copyright
Holder(s) under this license and clearly marked as such. This may
include source files, build scripts and documentation.

"Reserved Font Name" refers to any names specified as such after the
copyright statement(s).

"Original Version" refers to the collection of Font Software
components as distributed by the Copyright Holder(s).

"Modified Version" refers to any derivative made by adding to,
deleting, or substituting -- in part or in whole -- any of the
components of the Original Version, by changing formats or by porting
the Font Software to a new environment.

"Author" refers to any designer, engineer, programmer, technical
writer or other person who contributed to the Font Software.

PERMISSION & CONDITIONS
Permission is hereby granted, free of charge, to any person obtaining
a copy of the Font Software, to use, study, copy, merge, embed,
modify, redistribute, and sell modified and unmodified copies of the
Font Software, subject to the following conditions:

1) Neither the Font Software nor any of its individual components, in
Original or Modified Versions, may be sold by itself.

2) Original or Modified Versions of the Font Software may be bundled,
redistributed and/or sold with any software, provided that each copy
contains the above copyright notice and this license. These can be
included either as stand-alone text files, human-readable headers or
in the appropriate machine-readable metadata fields within text or
binary files as long as those fields can be easily viewed by the user.

3) No Modified Version of the Font Software may use the Reserved Font
Name(s) unless explicit written permission is granted by the
corresponding Copyright Holder. This restriction only applies to the
primary font name as presented to the users.

4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font
Software shall not be used to promote, endorse or advertise any
Modified Version, except to acknowledge the contribution(s) of the
Copyright Holder(s) and the Author(s) or with their explicit written
permission.

5) The Font Software, modified or unmodified, in part or in whole,
must be distributed entirely under this license, and must not be
distributed under any other license. The requirement for fonts to
remain under this license does not apply to any document created using
the Font Software.

TERMINATION
This license becomes null and void if any of the above conditions are
not met.

DISCLAIMER
THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE
COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL
DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM
OTHER DEALINGS IN THE FONT SOFTWARE.
99 changes: 98 additions & 1 deletion tests/test_uharfbuzz.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,107 @@
import uharfbuzz as hb
from pathlib import Path
import pytest


class TestBuffer:
TESTDATA = Path(__file__).parent / "data"
ADOBE_BLANK_TTF = (TESTDATA / "AdobeBlank.subset.ttf").read_bytes()


@pytest.fixture
def blankfont():
"""Return a subset of AdobeBlank.ttf containing the following glyphs/characters:
[
{gid=0, name=".notdef"},
{gid=1, name="a", code=0x61},
{gid=2, name="b", code=0x62},
{gid=3, name="c", code=0x63},
{gid=4, name="d", code=0x64},
{gid=5, name="e", code=0x65},
{gid=6, name="ccedilla", code=0x62},
{gid=7, name="uni0431", code=0x0431}, # CYRILLIC SMALL LETTER BE
{gid=8, name="u1F4A9", code=0x1F4A9}, # PILE OF POO
]
"""
face = hb.Face(ADOBE_BLANK_TTF)
font = hb.Font(face)
upem = face.upem
font.scale = (upem, upem)
hb.ot_font_set_funcs(font)
return font


class TestBuffer:
def test_init(self):
buf = hb.Buffer()

def test_create(self):
buf = hb.Buffer.create()

@pytest.mark.parametrize(
"string, expected",
[
("abcde", [(0x61, 0), (0x62, 1), (0x63, 2), (0x64, 3), (0x65, 4)]),
("abçde", [(0x61, 0), (0x62, 1), (0xE7, 2), (0x64, 3), (0x65, 4)]),
("aбcde", [(0x61, 0), (0x431, 1), (0x63, 2), (0x64, 3), (0x65, 4)]),
("abc💩e", [(0x61, 0), (0x62, 1), (0x63, 2), (0x1F4A9, 3), (0x65, 4)]),
],
ids=["ascii", "latin1", "ucs2", "ucs4"],
)
def test_add_str(self, string, expected):
buf = hb.Buffer()
buf.add_str(string)
infos = [(g.codepoint, g.cluster) for g in buf.glyph_infos]
assert infos == expected

def test_add_utf8(self):
buf = hb.Buffer()
buf.add_utf8("aбç💩e".encode("utf-8"))
infos = [(g.codepoint, g.cluster) for g in buf.glyph_infos]
assert infos == [(0x61, 0), (0x431, 1), (0xE7, 3), (0x1F4A9, 5), (0x65, 9)]

def test_add_codepoints(self):
buf = hb.Buffer()
buf.add_codepoints([0x61, 0x431, 0xE7, 0x1F4A9, 0x65])
infos = [(g.codepoint, g.cluster) for g in buf.glyph_infos]
assert infos == [(0x61, 0), (0x431, 1), (0xE7, 2), (0x1F4A9, 3), (0x65, 4)]

def test_guess_set_segment_properties(self):
buf = hb.Buffer()
buf.add_str("הארץ")

buf.guess_segment_properties()

assert buf.direction == "rtl"
assert buf.script == "Hebr"
# the guessed language seems to be locale specific
# assert buf.language == "en-us"
assert buf.language

buf.direction = "ltr"
assert buf.direction == "ltr"

buf.script = "Latn"
assert buf.script == "Latn"

buf.language = "he-il"
assert buf.language == "he-il"


class TestShape:
@pytest.mark.parametrize(
"string, expected",
[
("abcde", [(1, 0), (2, 1), (3, 2), (4, 3), (5, 4)]),
("abçde", [(1, 0), (2, 1), (6, 2), (4, 3), (5, 4)]),
("aбcde", [(1, 0), (7, 1), (3, 2), (4, 3), (5, 4)]),
("abc💩e", [(1, 0), (2, 1), (3, 2), (8, 3), (5, 4)]),
],
ids=["ascii", "latin1", "ucs2", "ucs4"],
)
def test_gid_and_cluster_no_features(self, blankfont, string, expected):
buf = hb.Buffer()
buf.add_str(string)
buf.guess_segment_properties()
hb.shape(blankfont, buf)
infos = [(g.codepoint, g.cluster) for g in buf.glyph_infos]
assert infos == expected

0 comments on commit 395e256

Please sign in to comment.